Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ ext {
// Platforms
grpcVersion = '1.51.0' // [1.34.0,)
jacksonVersion = '2.14.0' // [2.9.0,)
micrometerVersion = '1.9.5' // [1.0.0,)
micrometerVersion = '1.9.6' // [1.0.0,) // we don't upgrade to 1.10.x because it requires kotlin 1.6. Users may use 1.10.x in their environments though.

slf4jVersion = '1.7.36' // [1.4.0,) // stay on 1.x for a while to don't use any APIs from 2.x which may break our users which decide on 1.x
slf4jVersion = '1.7.36' // [1.4.0,) // stay on 1.x for a while to don't use any APIs from 2.x which may break our users which stay on 1.x
protoVersion = '3.21.9' // [3.10.0,)
annotationApiVersion = '1.3.2'
guavaVersion = '31.1-jre' // [10.0,)
Expand All @@ -46,7 +46,7 @@ ext {
springBootVersion = '2.7.5'// [2.4.0,)

// test scoped
logbackVersion = '1.2.11'
logbackVersion = '1.2.11' // we don't upgrade to 1.3 and 1.4 because they require slf4j 2.x
mockitoVersion = '4.9.0'
junitVersion = '4.13.2'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,23 @@ public Builder setTaskQueue(String taskQueue) {
}

/**
* RetryOptions that define how activity is retried in case of failure. If this is not set, then
* the server-defined default activity retry policy will be used. To ensure zero retries, set
* maximum attempts to 1.
* RetryOptions that define how an Activity is retried in case of failure.
*
* <p>If not provided, the server-defined default activity retry policy will be used. If not
* overridden, the server default activity retry policy is:
*
* <pre><code>
* InitialInterval: 1 second
* BackoffCoefficient: 2
* MaximumInterval: 100 seconds // 100 * InitialInterval
* MaximumAttempts: 0 // Unlimited
* NonRetryableErrorTypes: []
* </pre></code>
*
* <p>If both {@link #setScheduleToCloseTimeout(Duration)} and {@link
* RetryOptions.Builder#setMaximumAttempts(int)} are not set, the Activity will not be retried.
*
* <p>To ensure zero retries, set {@link RetryOptions.Builder#setMaximumAttempts(int)} to 1.
*/
public Builder setRetryOptions(RetryOptions retryOptions) {
this.retryOptions = retryOptions;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,22 @@ public Builder mergeActivityOptions(LocalActivityOptions override) {
}

/**
* {@link RetryOptions} that define how an Activity is retried in case of failure. Activities
* use a default RetryPolicy if not provided.
* {@link RetryOptions} that define how an Activity is retried in case of failure.
*
* <p>If not provided, the default activity retry policy is:
*
* <pre><code>
* InitialInterval: 1 second
* BackoffCoefficient: 2
* MaximumInterval: 100 seconds // 100 * InitialInterval
* MaximumAttempts: 0 // Unlimited
* NonRetryableErrorTypes: []
* </pre></code>
*
* <p>If both {@link #setScheduleToCloseTimeout(Duration)} and {@link
* RetryOptions.Builder#setMaximumAttempts(int)} are not set, the Activity will not be retried.
*
* <p>To ensure zero retries, set {@link RetryOptions.Builder#setMaximumAttempts(int)} to 1.
*/
public Builder setRetryOptions(RetryOptions retryOptions) {
this.retryOptions = retryOptions;
Expand Down
25 changes: 14 additions & 11 deletions temporal-sdk/src/main/java/io/temporal/common/RetryOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

package io.temporal.common;

import com.google.common.base.Preconditions;
import io.temporal.failure.ActivityFailure;
import io.temporal.failure.ApplicationFailure;
import io.temporal.failure.CanceledFailure;
Expand All @@ -33,6 +34,7 @@
public final class RetryOptions {

private static final double DEFAULT_BACKOFF_COEFFICIENT = 2.0;
private static final Duration DEFAULT_INITIAL_INTERVAL = Duration.ofSeconds(1);
private static final int DEFAULT_MAXIMUM_MULTIPLIER = 100;

public static Builder newBuilder() {
Expand Down Expand Up @@ -130,8 +132,6 @@ private void validate() {

public static final class Builder {

private static final Duration DEFAULT_INITIAL_INTERVAL = Duration.ofSeconds(1);

private Duration initialInterval;

private double backoffCoefficient;
Expand Down Expand Up @@ -168,11 +168,12 @@ public Builder setInitialInterval(Duration initialInterval) {
/**
* Coefficient used to calculate the next retry interval. The next retry interval is previous
* interval multiplied by this coefficient. Must be 1 or larger. Default is 2.0.
*
* @throws IllegalArgumentException if {@code backoffCoefficient} is less than 1.0
*/
public Builder setBackoffCoefficient(double backoffCoefficient) {
if (backoffCoefficient < 1d) {
throw new IllegalArgumentException("coefficient less than 1.0: " + backoffCoefficient);
}
Preconditions.checkArgument(
backoffCoefficient >= 1, "backoffCoefficient must be >= 1, was %s", backoffCoefficient);
this.backoffCoefficient = backoffCoefficient;
return this;
}
Expand All @@ -182,11 +183,11 @@ public Builder setBackoffCoefficient(double backoffCoefficient) {
* Default is unlimited.
*
* @param maximumAttempts Maximum number of attempts. Default will be used if set to {@code 0}.
* @throws IllegalArgumentException if {@code maximumAttempts} is less than 0
*/
public Builder setMaximumAttempts(int maximumAttempts) {
if (maximumAttempts < 0) {
throw new IllegalArgumentException("Invalid maximumAttempts: " + maximumAttempts);
}
Preconditions.checkArgument(
maximumAttempts >= 0, "maximumAttempts must be >= 0, was %s", maximumAttempts);
this.maximumAttempts = maximumAttempts;
return this;
}
Expand All @@ -198,11 +199,13 @@ public Builder setMaximumAttempts(int maximumAttempts) {
*
* @param maximumInterval the maximum interval value. Default will be used if set to {@code
* null}.
* @throws IllegalArgumentException if {@code maximumInterval} is not null and not positive
*/
public Builder setMaximumInterval(Duration maximumInterval) {
if (maximumInterval != null && (maximumInterval.isNegative() || maximumInterval.isZero())) {
throw new IllegalArgumentException("Invalid maximum interval: " + maximumInterval);
}
Preconditions.checkArgument(
maximumInterval == null || maximumInterval.compareTo(Duration.ZERO) > 0,
"Invalid maximum interval: %s",
maximumInterval);
this.maximumInterval = maximumInterval;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved.
*
* Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Modifications copyright (C) 2017 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this material except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.temporal.internal.common;

import io.grpc.Deadline;
import io.temporal.api.common.v1.RetryPolicy;
import io.temporal.common.RetryOptions;
import io.temporal.failure.ActivityFailure;
import io.temporal.failure.ApplicationFailure;
import io.temporal.failure.ChildWorkflowFailure;
import java.time.Duration;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;

public class RetryOptionsUtils {
public static boolean isNotRetryable(RetryOptions o, @Nullable Throwable e) {
if (e == null) {
return false;
}
if (e instanceof ActivityFailure || e instanceof ChildWorkflowFailure) {
e = e.getCause();
}
String type =
e instanceof ApplicationFailure
? ((ApplicationFailure) e).getType()
: e.getClass().getName();
Comment on lines +41 to +44
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to set an explicit do not retry field in Java or is it only the string matching?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

String matching as it gets converted to protobuf's RetryPolicy, at least at this moment.

if (o.getDoNotRetry() != null) {
for (String doNotRetry : o.getDoNotRetry()) {
if (doNotRetry.equals(type)) {
return true;
}
}
}
return false;
}

public static boolean areAttemptsReached(RetryOptions o, long attempt) {
return (o.getMaximumAttempts() != 0 && attempt >= o.getMaximumAttempts());
}

public static boolean isDeadlineReached(@Nullable Deadline deadline, long sleepTimeMs) {
return deadline != null && deadline.timeRemaining(TimeUnit.MILLISECONDS) < sleepTimeMs;
}

public static RetryOptions toRetryOptions(RetryPolicy retryPolicy) {
RetryOptions.Builder roBuilder = RetryOptions.newBuilder();

Duration maximumInterval = ProtobufTimeUtils.toJavaDuration(retryPolicy.getMaximumInterval());
if (!maximumInterval.isZero()) {
roBuilder.setMaximumInterval(maximumInterval);
}

Duration initialInterval = ProtobufTimeUtils.toJavaDuration(retryPolicy.getInitialInterval());
if (!initialInterval.isZero()) {
roBuilder.setInitialInterval(initialInterval);
}

if (retryPolicy.getBackoffCoefficient() >= 1) {
roBuilder.setBackoffCoefficient(retryPolicy.getBackoffCoefficient());
}

if (retryPolicy.getMaximumAttempts() > 0) {
roBuilder.setMaximumAttempts(retryPolicy.getMaximumAttempts());
}

roBuilder.setDoNotRetry(
retryPolicy
.getNonRetryableErrorTypesList()
.toArray(new String[retryPolicy.getNonRetryableErrorTypesCount()]));

return roBuilder.validateBuildWithDefaults();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.BiFunction;

/**
* Implements workflow executor that relies on replay of a workflow code. An instance of this class
Expand All @@ -70,12 +69,12 @@ class ReplayWorkflowRunTaskHandler implements WorkflowRunTaskHandler {

private final Lock lock = new ReentrantLock();

private final Functions.Proc1<ActivityTaskHandler.Result> localActivityCompletionSink;
private final Functions.Proc1<LocalActivityResult> localActivityCompletionSink;

private final BlockingQueue<ActivityTaskHandler.Result> localActivityCompletionQueue =
private final BlockingQueue<LocalActivityResult> localActivityCompletionQueue =
new LinkedBlockingDeque<>();

private final BiFunction<LocalActivityTask, Duration, Boolean> localActivityTaskPoller;
private final LocalActivityDispatcher localActivityDispatcher;

private final ReplayWorkflow workflow;

Expand All @@ -93,15 +92,15 @@ class ReplayWorkflowRunTaskHandler implements WorkflowRunTaskHandler {
PollWorkflowTaskQueueResponseOrBuilder workflowTask,
SingleWorkerOptions workerOptions,
Scope metricsScope,
BiFunction<LocalActivityTask, Duration, Boolean> localActivityTaskPoller) {
LocalActivityDispatcher localActivityDispatcher) {
HistoryEvent startedEvent = workflowTask.getHistory().getEvents(0);
if (!startedEvent.hasWorkflowExecutionStartedEventAttributes()) {
throw new IllegalArgumentException(
"First event in the history is not WorkflowExecutionStarted");
}
this.startedEvent = startedEvent.getWorkflowExecutionStartedEventAttributes();
this.metricsScope = metricsScope;
this.localActivityTaskPoller = localActivityTaskPoller;
this.localActivityDispatcher = localActivityDispatcher;
this.workflow = workflow;

this.workflowStateMachines = new WorkflowStateMachines(new StatesMachinesCallbackImpl());
Expand Down Expand Up @@ -286,10 +285,10 @@ private void processLocalActivityRequests(long startTimeNs) throws InterruptedEx
// much sense. I believe we should add ScheduleToStart timeout for the local activities
// as well.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Todo now addressed?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope! #1512 Will be in a separate PR.

long maxWaitTimeNs = Math.max(nextWFTHeartbeatTimeNs - System.nanoTime(), 0);
boolean accepted =
localActivityTaskPoller.apply(
new LocalActivityTask(laRequest, localActivityCompletionSink),
Duration.ofNanos(maxWaitTimeNs));
laRequest.setScheduleToStartTimeout(Duration.ofNanos(maxWaitTimeNs));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can user not set sched-to-start timeout? (Honestly, makes sense)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's old logic. It will be gone in #1512.
We can discuss, but I don't see any "very good" solutions to not set scheduleToStart. I guess keep workflow tasks open will be expected by users if they have some unexpected backfilling in their systems. An alternative reasonable solution may be to set it to localRetryThreshold. The second approach is safer from the load control, but may be not expected by users.

boolean accepted = localActivityDispatcher.dispatch(laRequest, localActivityCompletionSink);
// TODO this needs to be reworked when we implement a proper scheduleToStart to report a
// Failure in the callback. A proper test is also needed for this scenario.
Preconditions.checkState(
accepted,
"Unable to schedule local activity for execution, "
Expand All @@ -302,7 +301,7 @@ private void processLocalActivityRequests(long startTimeNs) throws InterruptedEx
}

long maxWaitTimeTillHeartbeatNs = Math.max(nextWFTHeartbeatTimeNs - System.nanoTime(), 0);
ActivityTaskHandler.Result laCompletion =
LocalActivityResult laCompletion =
localActivityCompletionQueue.poll(maxWaitTimeTillHeartbeatNs, TimeUnit.NANOSECONDS);
if (laCompletion == null) {
// Need to force a new task as we are out of time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -67,7 +66,7 @@ public final class ReplayWorkflowTaskHandler implements WorkflowTaskHandler {
private final Duration stickyTaskQueueScheduleToStartTimeout;
private final WorkflowServiceStubs service;
private final String stickyTaskQueueName;
private final BiFunction<LocalActivityTask, Duration, Boolean> localActivityTaskPoller;
private final LocalActivityDispatcher localActivityDispatcher;

public ReplayWorkflowTaskHandler(
String namespace,
Expand All @@ -77,15 +76,15 @@ public ReplayWorkflowTaskHandler(
String stickyTaskQueueName,
Duration stickyTaskQueueScheduleToStartTimeout,
WorkflowServiceStubs service,
BiFunction<LocalActivityTask, Duration, Boolean> localActivityTaskPoller) {
LocalActivityDispatcher localActivityDispatcher) {
this.namespace = namespace;
this.workflowFactory = asyncWorkflowFactory;
this.cache = cache;
this.options = options;
this.stickyTaskQueueName = stickyTaskQueueName;
this.stickyTaskQueueScheduleToStartTimeout = stickyTaskQueueScheduleToStartTimeout;
this.service = Objects.requireNonNull(service);
this.localActivityTaskPoller = localActivityTaskPoller;
this.localActivityDispatcher = localActivityDispatcher;
}

@Override
Expand Down Expand Up @@ -365,7 +364,7 @@ private WorkflowRunTaskHandler createStatefulHandler(
}
ReplayWorkflow workflow = workflowFactory.getWorkflow(workflowType);
return new ReplayWorkflowRunTaskHandler(
namespace, workflow, workflowTask, options, metricsScope, localActivityTaskPoller);
namespace, workflow, workflowTask, options, metricsScope, localActivityDispatcher);
}

private void resetStickyTaskQueue(WorkflowExecution execution) {
Expand Down
Loading