fix(sj_1.1.0-beta2): 1. 修复MAP任务失败 2. 获取配置的reduce 分片数
This commit is contained in:
parent
2d3ada1634
commit
ca2ada3875
@ -17,5 +17,6 @@ public class CompleteJobBatchDTO {
|
|||||||
private Long taskBatchId;
|
private Long taskBatchId;
|
||||||
private Integer jobOperationReason;
|
private Integer jobOperationReason;
|
||||||
private Object result;
|
private Object result;
|
||||||
|
private Integer taskType;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
package com.aizuda.snailjob.server.job.task.dto;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author opensnail
|
||||||
|
* @date 2024-06-25 22:58:05
|
||||||
|
* @since sj_1.1.0
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
public class MapReduceArgsStrDTO {
|
||||||
|
|
||||||
|
private Integer shardNum;
|
||||||
|
|
||||||
|
private String argsStr;
|
||||||
|
}
|
@ -1,10 +1,15 @@
|
|||||||
package com.aizuda.snailjob.server.job.task.support.block.job;
|
package com.aizuda.snailjob.server.job.task.support.block.job;
|
||||||
|
|
||||||
|
import akka.actor.ActorRef;
|
||||||
import cn.hutool.core.collection.CollUtil;
|
import cn.hutool.core.collection.CollUtil;
|
||||||
import cn.hutool.core.lang.Assert;
|
import cn.hutool.core.lang.Assert;
|
||||||
import com.aizuda.snailjob.common.core.enums.JobTaskStatusEnum;
|
import com.aizuda.snailjob.common.core.enums.JobTaskStatusEnum;
|
||||||
|
import com.aizuda.snailjob.common.core.util.StreamUtils;
|
||||||
import com.aizuda.snailjob.common.log.SnailJobLog;
|
import com.aizuda.snailjob.common.log.SnailJobLog;
|
||||||
|
import com.aizuda.snailjob.server.common.akka.ActorGenerator;
|
||||||
|
import com.aizuda.snailjob.server.common.enums.JobTaskExecutorSceneEnum;
|
||||||
import com.aizuda.snailjob.server.common.exception.SnailJobServerException;
|
import com.aizuda.snailjob.server.common.exception.SnailJobServerException;
|
||||||
|
import com.aizuda.snailjob.server.job.task.dto.TaskExecuteDTO;
|
||||||
import com.aizuda.snailjob.server.job.task.enums.BlockStrategyEnum;
|
import com.aizuda.snailjob.server.job.task.enums.BlockStrategyEnum;
|
||||||
import com.aizuda.snailjob.server.job.task.support.JobExecutor;
|
import com.aizuda.snailjob.server.job.task.support.JobExecutor;
|
||||||
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
|
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
|
||||||
@ -19,6 +24,7 @@ import lombok.RequiredArgsConstructor;
|
|||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 重新触发执行失败的任务
|
* 重新触发执行失败的任务
|
||||||
@ -29,7 +35,7 @@ import java.util.List;
|
|||||||
*/
|
*/
|
||||||
@Component
|
@Component
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class DiscardRetryBlockStrategy extends AbstracJobBlockStrategy {
|
public class RecoveryBlockStrategy extends AbstracJobBlockStrategy {
|
||||||
private final JobTaskMapper jobTaskMapper;
|
private final JobTaskMapper jobTaskMapper;
|
||||||
private final JobMapper jobMapper;
|
private final JobMapper jobMapper;
|
||||||
@Override
|
@Override
|
||||||
@ -42,18 +48,26 @@ public class DiscardRetryBlockStrategy extends AbstracJobBlockStrategy {
|
|||||||
new LambdaQueryWrapper<JobTask>()
|
new LambdaQueryWrapper<JobTask>()
|
||||||
.select(JobTask::getId, JobTask::getTaskStatus)
|
.select(JobTask::getId, JobTask::getTaskStatus)
|
||||||
.eq(JobTask::getTaskBatchId, context.getTaskBatchId())
|
.eq(JobTask::getTaskBatchId, context.getTaskBatchId())
|
||||||
.eq(JobTask::getTaskStatus, JobTaskStatusEnum.NOT_SUCCESS)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// 若任务项为空则生成任务项
|
||||||
if (CollUtil.isEmpty(jobTasks)) {
|
if (CollUtil.isEmpty(jobTasks)) {
|
||||||
SnailJobLog.LOCAL.warn("No executable job task. taskBatchId:[{}]", context.getTaskBatchId());
|
TaskExecuteDTO taskExecuteDTO = new TaskExecuteDTO();
|
||||||
|
taskExecuteDTO.setTaskBatchId(context.getTaskBatchId());
|
||||||
|
taskExecuteDTO.setJobId(context.getJobId());
|
||||||
|
taskExecuteDTO.setTaskExecutorScene(JobTaskExecutorSceneEnum.MANUAL_JOB.getType());
|
||||||
|
taskExecuteDTO.setWorkflowTaskBatchId(context.getWorkflowTaskBatchId());
|
||||||
|
taskExecuteDTO.setWorkflowNodeId(context.getWorkflowNodeId());
|
||||||
|
ActorRef actorRef = ActorGenerator.jobTaskExecutorActor();
|
||||||
|
actorRef.tell(taskExecuteDTO, actorRef);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Job job = jobMapper.selectById(context.getJobId());
|
Job job = jobMapper.selectById(context.getJobId());
|
||||||
// 执行任务
|
// 执行任务 Stop or Fail 任务
|
||||||
JobExecutor jobExecutor = JobExecutorFactory.getJobExecutor(context.getTaskType());
|
JobExecutor jobExecutor = JobExecutorFactory.getJobExecutor(context.getTaskType());
|
||||||
jobExecutor.execute(buildJobExecutorContext(context, job, jobTasks));
|
jobExecutor.execute(buildJobExecutorContext(context, job,
|
||||||
|
StreamUtils.filter(jobTasks, (jobTask) -> JobTaskStatusEnum.NOT_SUCCESS.contains(jobTask.getTaskStatus()))));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
@ -51,6 +51,7 @@ public class JobExecutorResultActor extends AbstractActor {
|
|||||||
Assert.notNull(result.getTaskId(), ()-> new SnailJobServerException("taskId can not be null"));
|
Assert.notNull(result.getTaskId(), ()-> new SnailJobServerException("taskId can not be null"));
|
||||||
Assert.notNull(result.getJobId(), ()-> new SnailJobServerException("jobId can not be null"));
|
Assert.notNull(result.getJobId(), ()-> new SnailJobServerException("jobId can not be null"));
|
||||||
Assert.notNull(result.getTaskBatchId(), ()-> new SnailJobServerException("taskBatchId can not be null"));
|
Assert.notNull(result.getTaskBatchId(), ()-> new SnailJobServerException("taskBatchId can not be null"));
|
||||||
|
Assert.notNull(result.getTaskType(), ()-> new SnailJobServerException("taskType can not be null"));
|
||||||
|
|
||||||
JobTask jobTask = new JobTask();
|
JobTask jobTask = new JobTask();
|
||||||
jobTask.setTaskStatus(result.getTaskStatus());
|
jobTask.setTaskStatus(result.getTaskStatus());
|
||||||
@ -63,9 +64,6 @@ public class JobExecutorResultActor extends AbstractActor {
|
|||||||
new LambdaUpdateWrapper<JobTask>().eq(JobTask::getId, result.getTaskId())),
|
new LambdaUpdateWrapper<JobTask>().eq(JobTask::getId, result.getTaskId())),
|
||||||
() -> new SnailJobServerException("更新任务实例失败"));
|
() -> new SnailJobServerException("更新任务实例失败"));
|
||||||
|
|
||||||
// 更新工作流的全局上下文 如果并发更新失败则需要自旋重试更新
|
|
||||||
// workflowBatchHandler.mergeWorkflowContextAndRetry(result.getWorkflowTaskBatchId(), result.getWfContext());
|
|
||||||
|
|
||||||
// 除MAP和MAP_REDUCE 任务之外,其他任务都是叶子节点
|
// 除MAP和MAP_REDUCE 任务之外,其他任务都是叶子节点
|
||||||
if (Objects.nonNull(result.getIsLeaf()) && StatusEnum.NO.getStatus().equals(result.getIsLeaf())) {
|
if (Objects.nonNull(result.getIsLeaf()) && StatusEnum.NO.getStatus().equals(result.getIsLeaf())) {
|
||||||
return;
|
return;
|
||||||
|
@ -16,6 +16,7 @@ import com.aizuda.snailjob.server.common.cache.CacheRegisterTable;
|
|||||||
import com.aizuda.snailjob.server.common.dto.RegisterNodeInfo;
|
import com.aizuda.snailjob.server.common.dto.RegisterNodeInfo;
|
||||||
import com.aizuda.snailjob.server.common.exception.SnailJobServerException;
|
import com.aizuda.snailjob.server.common.exception.SnailJobServerException;
|
||||||
import com.aizuda.snailjob.server.common.util.ClientInfoUtils;
|
import com.aizuda.snailjob.server.common.util.ClientInfoUtils;
|
||||||
|
import com.aizuda.snailjob.server.job.task.dto.MapReduceArgsStrDTO;
|
||||||
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
|
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
|
||||||
import com.aizuda.snailjob.template.datasource.persistence.mapper.JobTaskMapper;
|
import com.aizuda.snailjob.template.datasource.persistence.mapper.JobTaskMapper;
|
||||||
import com.aizuda.snailjob.template.datasource.persistence.po.JobTask;
|
import com.aizuda.snailjob.template.datasource.persistence.po.JobTask;
|
||||||
@ -43,6 +44,8 @@ import java.util.*;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
||||||
|
|
||||||
|
private static final String MERGE_REDUCE_TASK = "MERGE_REDUCE_TASK";
|
||||||
|
private static final String REDUCE_TASK = "REDUCE_TASK";
|
||||||
private final JobTaskMapper jobTaskMapper;
|
private final JobTaskMapper jobTaskMapper;
|
||||||
private final TransactionTemplate transactionTemplate;
|
private final TransactionTemplate transactionTemplate;
|
||||||
|
|
||||||
@ -53,7 +56,6 @@ public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<JobTask> doGenerate(final JobTaskGenerateContext context) {
|
protected List<JobTask> doGenerate(final JobTaskGenerateContext context) {
|
||||||
// TODO 若没有客户端节点JobTask是否需要创建????
|
|
||||||
Set<RegisterNodeInfo> serverNodes = CacheRegisterTable.getServerNodeSet(context.getGroupName(),
|
Set<RegisterNodeInfo> serverNodes = CacheRegisterTable.getServerNodeSet(context.getGroupName(),
|
||||||
context.getNamespaceId());
|
context.getNamespaceId());
|
||||||
if (CollUtil.isEmpty(serverNodes)) {
|
if (CollUtil.isEmpty(serverNodes)) {
|
||||||
@ -64,9 +66,7 @@ public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
|||||||
List<RegisterNodeInfo> nodeInfoList = new ArrayList<>(serverNodes);
|
List<RegisterNodeInfo> nodeInfoList = new ArrayList<>(serverNodes);
|
||||||
MapReduceStageEnum mapReduceStageEnum = MapReduceStageEnum.ofStage(context.getMrStage());
|
MapReduceStageEnum mapReduceStageEnum = MapReduceStageEnum.ofStage(context.getMrStage());
|
||||||
Assert.notNull(mapReduceStageEnum, () -> new SnailJobServerException("Map reduce stage is not existed"));
|
Assert.notNull(mapReduceStageEnum, () -> new SnailJobServerException("Map reduce stage is not existed"));
|
||||||
|
switch (Objects.requireNonNull(mapReduceStageEnum)) {
|
||||||
// todo 待优化
|
|
||||||
switch (mapReduceStageEnum) {
|
|
||||||
case MAP -> {
|
case MAP -> {
|
||||||
// MAP任务
|
// MAP任务
|
||||||
return createMapJobTasks(context, nodeInfoList, serverNodes);
|
return createMapJobTasks(context, nodeInfoList, serverNodes);
|
||||||
@ -105,7 +105,7 @@ public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
|||||||
jobTask.setTaskStatus(JobTaskStatusEnum.RUNNING.getStatus());
|
jobTask.setTaskStatus(JobTaskStatusEnum.RUNNING.getStatus());
|
||||||
jobTask.setResultMessage(Optional.ofNullable(jobTask.getResultMessage()).orElse(StrUtil.EMPTY));
|
jobTask.setResultMessage(Optional.ofNullable(jobTask.getResultMessage()).orElse(StrUtil.EMPTY));
|
||||||
jobTask.setMrStage(MapReduceStageEnum.MERGE_REDUCE.getStage());
|
jobTask.setMrStage(MapReduceStageEnum.MERGE_REDUCE.getStage());
|
||||||
jobTask.setTaskName("MERGE_REDUCE_TASK");
|
jobTask.setTaskName(MERGE_REDUCE_TASK);
|
||||||
Assert.isTrue(1 == jobTaskMapper.insert(jobTask),
|
Assert.isTrue(1 == jobTaskMapper.insert(jobTask),
|
||||||
() -> new SnailJobServerException("新增任务实例失败"));
|
() -> new SnailJobServerException("新增任务实例失败"));
|
||||||
|
|
||||||
@ -115,8 +115,15 @@ public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
|||||||
private List<JobTask> createReduceJobTasks(JobTaskGenerateContext context, List<RegisterNodeInfo> nodeInfoList,
|
private List<JobTask> createReduceJobTasks(JobTaskGenerateContext context, List<RegisterNodeInfo> nodeInfoList,
|
||||||
Set<RegisterNodeInfo> serverNodes) {
|
Set<RegisterNodeInfo> serverNodes) {
|
||||||
|
|
||||||
// TODO reduce阶段的并行度
|
int reduceParallel = 1;
|
||||||
int reduceParallel = 2;
|
String jobParams = null;
|
||||||
|
try {
|
||||||
|
MapReduceArgsStrDTO mapReduceArgsStrDTO = JsonUtil.parseObject(context.getArgsStr(), MapReduceArgsStrDTO.class);
|
||||||
|
reduceParallel = Optional.ofNullable(mapReduceArgsStrDTO.getShardNum()).orElse(1);
|
||||||
|
jobParams = mapReduceArgsStrDTO.getArgsStr();
|
||||||
|
} catch (Exception e) {
|
||||||
|
SnailJobLog.LOCAL.error("map reduce args parse error. argsStr:[{}]", context.getArgsStr());
|
||||||
|
}
|
||||||
|
|
||||||
List<JobTask> jobTasks = jobTaskMapper.selectList(new LambdaQueryWrapper<JobTask>()
|
List<JobTask> jobTasks = jobTaskMapper.selectList(new LambdaQueryWrapper<JobTask>()
|
||||||
.select(JobTask::getResultMessage)
|
.select(JobTask::getResultMessage)
|
||||||
@ -132,6 +139,7 @@ public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
|||||||
|
|
||||||
jobTasks = new ArrayList<>(partition.size());
|
jobTasks = new ArrayList<>(partition.size());
|
||||||
final List<JobTask> finalJobTasks = jobTasks;
|
final List<JobTask> finalJobTasks = jobTasks;
|
||||||
|
String finalJobParams = jobParams;
|
||||||
transactionTemplate.execute(new TransactionCallbackWithoutResult() {
|
transactionTemplate.execute(new TransactionCallbackWithoutResult() {
|
||||||
@Override
|
@Override
|
||||||
protected void doInTransactionWithoutResult(final TransactionStatus status) {
|
protected void doInTransactionWithoutResult(final TransactionStatus status) {
|
||||||
@ -142,13 +150,13 @@ public class MapReduceTaskGenerator extends AbstractJobTaskGenerator {
|
|||||||
jobTask.setClientInfo(ClientInfoUtils.generate(registerNodeInfo));
|
jobTask.setClientInfo(ClientInfoUtils.generate(registerNodeInfo));
|
||||||
jobTask.setArgsType(context.getArgsType());
|
jobTask.setArgsType(context.getArgsType());
|
||||||
JobArgsHolder jobArgsHolder = new JobArgsHolder();
|
JobArgsHolder jobArgsHolder = new JobArgsHolder();
|
||||||
jobArgsHolder.setJobParams(StrUtil.isBlank(context.getArgsStr()) ? null : context.getArgsStr());
|
jobArgsHolder.setJobParams(finalJobParams);
|
||||||
jobArgsHolder.setMaps(JsonUtil.toJsonString(partition.get(index)));
|
jobArgsHolder.setMaps(JsonUtil.toJsonString(partition.get(index)));
|
||||||
jobTask.setArgsStr(JsonUtil.toJsonString(jobArgsHolder));
|
jobTask.setArgsStr(JsonUtil.toJsonString(jobArgsHolder));
|
||||||
jobTask.setTaskStatus(JobTaskStatusEnum.RUNNING.getStatus());
|
jobTask.setTaskStatus(JobTaskStatusEnum.RUNNING.getStatus());
|
||||||
jobTask.setResultMessage(Optional.ofNullable(jobTask.getResultMessage()).orElse(StrUtil.EMPTY));
|
jobTask.setResultMessage(Optional.ofNullable(jobTask.getResultMessage()).orElse(StrUtil.EMPTY));
|
||||||
jobTask.setMrStage(MapReduceStageEnum.REDUCE.getStage());
|
jobTask.setMrStage(MapReduceStageEnum.REDUCE.getStage());
|
||||||
jobTask.setTaskName("REDUCE_TASK");
|
jobTask.setTaskName(REDUCE_TASK);
|
||||||
jobTask.setParentId(0L);
|
jobTask.setParentId(0L);
|
||||||
jobTask.setRetryCount(0);
|
jobTask.setRetryCount(0);
|
||||||
jobTask.setLeaf(StatusEnum.YES.getStatus());
|
jobTask.setLeaf(StatusEnum.YES.getStatus());
|
||||||
|
@ -95,12 +95,10 @@ public class JobTaskBatchHandler {
|
|||||||
} else if (stopCount > 0) {
|
} else if (stopCount > 0) {
|
||||||
jobTaskBatch.setTaskBatchStatus(JobTaskBatchStatusEnum.STOP.getStatus());
|
jobTaskBatch.setTaskBatchStatus(JobTaskBatchStatusEnum.STOP.getStatus());
|
||||||
} else {
|
} else {
|
||||||
// todo 调试完成删除
|
|
||||||
SnailJobLog.LOCAL.info("尝试完成任务. taskBatchId:[{}] [{}]", completeJobBatchDTO.getTaskBatchId(),
|
|
||||||
JsonUtil.toJsonString(jobTasks));
|
|
||||||
|
|
||||||
jobTaskBatch.setTaskBatchStatus(JobTaskBatchStatusEnum.SUCCESS.getStatus());
|
jobTaskBatch.setTaskBatchStatus(JobTaskBatchStatusEnum.SUCCESS.getStatus());
|
||||||
if (needReduceTask(completeJobBatchDTO, jobTasks)) {
|
if (needReduceTask(completeJobBatchDTO, jobTasks)
|
||||||
|
&& JobTaskTypeEnum.MAP_REDUCE.getType() == completeJobBatchDTO.getTaskType()) {
|
||||||
// 此时中断批次完成,需要开启reduce任务
|
// 此时中断批次完成,需要开启reduce任务
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user