Line data Source code
1 : /*
2 : *
3 : * Copyright (c) 2020-2021 Project CHIP Authors
4 : * All rights reserved.
5 : *
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : */
18 :
19 : /**
20 : * @file
21 : * This file contains implementation of Device class. The objects of this
22 : * class will be used by Controller applications to interact with CHIP
23 : * devices. The class provides mechanism to construct, send and receive
24 : * messages to and from the corresponding CHIP devices.
25 : */
26 :
27 : #include <app/OperationalSessionSetup.h>
28 :
29 : #include <app/CASEClient.h>
30 : #include <app/InteractionModelEngine.h>
31 : #include <transport/SecureSession.h>
32 :
33 : #include <lib/address_resolve/AddressResolve.h>
34 : #include <lib/core/CHIPCore.h>
35 : #include <lib/core/CHIPEncoding.h>
36 : #include <lib/dnssd/Resolver.h>
37 : #include <lib/support/CodeUtils.h>
38 : #include <lib/support/logging/CHIPLogging.h>
39 : #include <system/SystemClock.h>
40 : #include <system/SystemLayer.h>
41 : #include <tracing/metric_event.h>
42 :
43 : using namespace chip::Callback;
44 : using chip::AddressResolve::NodeLookupRequest;
45 : using chip::AddressResolve::Resolver;
46 : using chip::AddressResolve::ResolveResult;
47 : using namespace chip::Tracing;
48 :
49 : namespace chip {
50 :
51 0 : void OperationalSessionSetup::MoveToState(State aTargetState)
52 : {
53 0 : if (mState != aTargetState)
54 : {
55 0 : ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
56 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
57 : to_underlying(aTargetState));
58 :
59 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
60 0 : if (mState == State::WaitingForRetry)
61 : {
62 0 : CancelSessionSetupReattempt();
63 : }
64 : #endif
65 :
66 0 : mState = aTargetState;
67 :
68 0 : if (aTargetState != State::Connecting)
69 : {
70 0 : CleanupCASEClient();
71 : }
72 : }
73 0 : }
74 :
75 0 : bool OperationalSessionSetup::AttachToExistingSecureSession()
76 : {
77 0 : VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress ||
78 : mState == State::WaitingForRetry,
79 : false);
80 :
81 0 : auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode(
82 0 : mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE), mTransportPayloadCapability);
83 0 : if (!sessionHandle.HasValue())
84 0 : return false;
85 :
86 0 : ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
87 : ChipLogValueX64(mPeerId.GetNodeId()));
88 :
89 0 : mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress();
90 0 : if (!mSecureSession.Grab(sessionHandle.Value()))
91 0 : return false;
92 :
93 0 : return true;
94 0 : }
95 :
96 0 : void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
97 : Callback::Callback<OnDeviceConnectionFailure> * onFailure,
98 : Callback::Callback<OnSetupFailure> * onSetupFailure,
99 : TransportPayloadCapability transportPayloadCapability)
100 : {
101 0 : CHIP_ERROR err = CHIP_NO_ERROR;
102 0 : bool isConnected = false;
103 :
104 0 : mTransportPayloadCapability = transportPayloadCapability;
105 : //
106 : // Always enqueue our user provided callbacks into our callback list.
107 : // If anything goes wrong below, we'll trigger failures (including any queued from
108 : // a previous iteration which in theory shouldn't happen, but this is written to be more defensive)
109 : //
110 0 : EnqueueConnectionCallbacks(onConnection, onFailure, onSetupFailure);
111 :
112 0 : switch (mState)
113 : {
114 0 : case State::Uninitialized:
115 0 : err = CHIP_ERROR_INCORRECT_STATE;
116 0 : break;
117 :
118 0 : case State::NeedsAddress:
119 0 : isConnected = AttachToExistingSecureSession();
120 0 : if (!isConnected)
121 : {
122 : // LookupPeerAddress could perhaps call back with a result
123 : // synchronously, so do our state update first.
124 0 : MoveToState(State::ResolvingAddress);
125 0 : err = LookupPeerAddress();
126 0 : if (err != CHIP_NO_ERROR)
127 : {
128 : // Roll back the state change, since we are presumably not in
129 : // the middle of a lookup.
130 0 : MoveToState(State::NeedsAddress);
131 : }
132 : }
133 :
134 0 : break;
135 :
136 0 : case State::ResolvingAddress:
137 : case State::WaitingForRetry:
138 0 : isConnected = AttachToExistingSecureSession();
139 0 : break;
140 :
141 0 : case State::HasAddress:
142 0 : isConnected = AttachToExistingSecureSession();
143 0 : if (!isConnected)
144 : {
145 : // We should not actually every be in be in State::HasAddress. This
146 : // is because in the same call that we moved to State::HasAddress
147 : // we either move to State::Connecting or call
148 : // DequeueConnectionCallbacks with an error thus releasing
149 : // ourselves before any call would reach this section of code.
150 0 : err = CHIP_ERROR_INCORRECT_STATE;
151 : }
152 :
153 0 : break;
154 :
155 0 : case State::Connecting:
156 0 : break;
157 :
158 0 : case State::SecureConnected:
159 0 : isConnected = true;
160 0 : break;
161 :
162 0 : default:
163 0 : err = CHIP_ERROR_INCORRECT_STATE;
164 : }
165 :
166 0 : if (isConnected)
167 : {
168 0 : MoveToState(State::SecureConnected);
169 : }
170 :
171 : //
172 : // Dequeue all our callbacks on either encountering an error
173 : // or if we successfully connected. Both should not be set
174 : // simultaneously.
175 : //
176 0 : if (err != CHIP_NO_ERROR || isConnected)
177 : {
178 0 : DequeueConnectionCallbacks(err);
179 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
180 : // While it is odd to have an explicit return here at the end of the function, we do so
181 : // as a precaution in case someone later on adds something to the end of this function.
182 0 : return;
183 : }
184 : }
185 :
186 0 : void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
187 : Callback::Callback<OnDeviceConnectionFailure> * onFailure,
188 : TransportPayloadCapability transportPayloadCapability)
189 : {
190 0 : Connect(onConnection, onFailure, nullptr, transportPayloadCapability);
191 0 : }
192 :
193 0 : void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
194 : Callback::Callback<OnSetupFailure> * onSetupFailure,
195 : TransportPayloadCapability transportPayloadCapability)
196 : {
197 0 : Connect(onConnection, nullptr, onSetupFailure, transportPayloadCapability);
198 0 : }
199 :
200 0 : void OperationalSessionSetup::UpdateDeviceData(const ResolveResult & result)
201 : {
202 0 : auto & config = result.mrpRemoteConfig;
203 0 : auto addr = result.address;
204 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
205 : // Make sure to clear out our reason for trying the next result first thing,
206 : // so it does not stick around in various error cases.
207 0 : bool tryingNextResultDueToSessionEstablishmentError = mTryingNextResultDueToSessionEstablishmentError;
208 0 : mTryingNextResultDueToSessionEstablishmentError = false;
209 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
210 :
211 0 : if (mState == State::Uninitialized)
212 : {
213 0 : return;
214 : }
215 :
216 : #if CHIP_DETAIL_LOGGING
217 : char peerAddrBuff[Transport::PeerAddress::kMaxToStringSize];
218 0 : addr.ToString(peerAddrBuff);
219 :
220 0 : ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Updating device address to %s while in state %d",
221 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), peerAddrBuff, static_cast<int>(mState));
222 : #endif
223 :
224 0 : mDeviceAddress = addr;
225 :
226 : // Initialize CASE session state with any MRP parameters that DNS-SD has provided.
227 : // It can be overridden by CASE session protocol messages that include MRP parameters.
228 0 : if (mCASEClient)
229 : {
230 0 : mCASEClient->SetRemoteMRPIntervals(config);
231 : }
232 :
233 0 : if (mState != State::ResolvingAddress)
234 : {
235 0 : ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state");
236 0 : DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
237 : // Do not touch `this` instance anymore; it has been destroyed in
238 : // DequeueConnectionCallbacks.
239 0 : return;
240 : }
241 :
242 0 : MoveToState(State::HasAddress);
243 0 : mInitParams.sessionManager->UpdateAllSessionsPeerAddress(mPeerId, addr);
244 :
245 0 : if (mPerformingAddressUpdate)
246 : {
247 : // Nothing else to do here.
248 0 : DequeueConnectionCallbacks(CHIP_NO_ERROR);
249 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
250 0 : return;
251 : }
252 :
253 0 : CHIP_ERROR err = EstablishConnection(result);
254 0 : LogErrorOnFailure(err);
255 0 : if (err == CHIP_NO_ERROR)
256 : {
257 : // We expect to get a callback via OnSessionEstablished or OnSessionEstablishmentError to continue
258 : // the state machine forward.
259 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
260 0 : if (tryingNextResultDueToSessionEstablishmentError)
261 : {
262 : // Our retry has already been kicked off, so claim 0 delay until it
263 : // starts. We only reach this from OnSessionEstablishmentError when
264 : // the error is CHIP_ERROR_TIMEOUT.
265 0 : NotifyRetryHandlers(CHIP_ERROR_TIMEOUT, config, System::Clock::kZero);
266 : }
267 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
268 0 : return;
269 : }
270 :
271 : // Move to the ResolvingAddress state, in case we have more results,
272 : // since we expect to receive results in that state. Pretend like we moved
273 : // on directly to this address from whatever triggered us to try this result
274 : // (so restore mTryingNextResultDueToSessionEstablishmentError to the value
275 : // it had at the start of this function).
276 0 : MoveToState(State::ResolvingAddress);
277 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
278 0 : mTryingNextResultDueToSessionEstablishmentError = tryingNextResultDueToSessionEstablishmentError;
279 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
280 0 : if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
281 : {
282 : // No need to NotifyRetryHandlers, since we never actually spent any
283 : // time trying the previous result. Whatever work we need to do has
284 : // been handled by our recursive OnNodeAddressResolved callback. Make
285 : // sure not to touch `this` under here, because it might have been
286 : // deleted by OnNodeAddressResolved.
287 0 : return;
288 : }
289 :
290 : // No need to reset mTryingNextResultDueToSessionEstablishmentError here,
291 : // because we're about to delete ourselves.
292 :
293 0 : DequeueConnectionCallbacks(err);
294 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
295 : }
296 :
297 0 : CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ResolveResult & result)
298 : {
299 0 : auto config = result.mrpRemoteConfig;
300 :
301 0 : if (result.isICDOperatingAsLIT)
302 : {
303 : // When an ICD operates as a LIT, the DNS-SD advertisement lacks the Session Idle Interval
304 : // (SII). This would cause mIdleRetransTimeout to be 0, which is not a usable value. Since
305 : // CASE is established with LIT ICDs only when they are active, we can base
306 : // mIdleRetransTimeout on active mode parameters. To ensure sufficient time for MRP
307 : // retransmissions, particularly in Thread networks where mActiveRetransTimeout might be too
308 : // small, we use the maximum of config.mActiveRetransTimeout and
309 : // mInitParams.minimumLITBackoffInterval
310 :
311 0 : config.mIdleRetransTimeout =
312 0 : std::max(config.mActiveRetransTimeout, System::Clock::Milliseconds32(mInitParams.minimumLITBackoffInterval.ValueOr(0)));
313 : }
314 : #if INET_CONFIG_ENABLE_TCP_ENDPOINT
315 0 : if (mTransportPayloadCapability == TransportPayloadCapability::kLargePayload)
316 : {
317 0 : if (result.supportsTcpServer)
318 : {
319 : // Set the transport type for carrying large payloads
320 0 : mDeviceAddress.SetTransportType(chip::Transport::Type::kTcp);
321 : }
322 : else
323 : {
324 : // we should not set the large payload while the TCP support is not enabled
325 0 : ChipLogError(
326 : Discovery,
327 : "LargePayload session requested but peer does not support TCP server, PeerNodeId=" ChipLogFormatScopedNodeId,
328 : ChipLogValueScopedNodeId(mPeerId));
329 0 : return CHIP_ERROR_INTERNAL;
330 : }
331 : }
332 : #endif
333 :
334 0 : mCASEClient = mClientPool->Allocate();
335 0 : VerifyOrReturnError(mCASEClient != nullptr, CHIP_ERROR_NO_MEMORY);
336 :
337 : MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
338 0 : CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this);
339 0 : if (err != CHIP_NO_ERROR)
340 : {
341 : MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
342 0 : CleanupCASEClient();
343 0 : return err;
344 : }
345 :
346 0 : MoveToState(State::Connecting);
347 :
348 0 : return CHIP_NO_ERROR;
349 : }
350 :
351 0 : void OperationalSessionSetup::EnqueueConnectionCallbacks(Callback::Callback<OnDeviceConnected> * onConnection,
352 : Callback::Callback<OnDeviceConnectionFailure> * onFailure,
353 : Callback::Callback<OnSetupFailure> * onSetupFailure)
354 : {
355 0 : mCallbacks.Enqueue(onConnection, onFailure, onSetupFailure);
356 0 : }
357 :
358 0 : void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error, SessionEstablishmentStage stage,
359 : ReleaseBehavior releaseBehavior)
360 : {
361 : // We expect that we only have callbacks if we are not performing just address update.
362 0 : VerifyOrDie(!mPerformingAddressUpdate || mCallbacks.IsEmpty());
363 :
364 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
365 : // Clear out mConnectionRetry, so that those cancelables are not holding
366 : // pointers to us, since we're about to go away.
367 0 : while (auto * cb = mConnectionRetry.First())
368 : {
369 0 : cb->Cancel();
370 0 : }
371 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
372 :
373 : // Gather up state we will need for our notifications.
374 0 : SuccessFailureCallbackList readyCallbacks;
375 0 : readyCallbacks.EnqueueTakeAll(mCallbacks);
376 0 : auto * exchangeMgr = mInitParams.exchangeMgr;
377 0 : Optional<SessionHandle> optionalSessionHandle = mSecureSession.Get();
378 0 : ScopedNodeId peerId = mPeerId;
379 0 : System::Clock::Milliseconds16 requestedBusyDelay =
380 : #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
381 : mRequestedBusyDelay;
382 : #else
383 : System::Clock::kZero;
384 : #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
385 :
386 0 : if (releaseBehavior == ReleaseBehavior::Release)
387 : {
388 0 : VerifyOrDie(mReleaseDelegate != nullptr);
389 0 : mReleaseDelegate->ReleaseSession(this);
390 : }
391 :
392 : // DO NOT touch any members of this object after this point. It's dead.
393 0 : NotifyConnectionCallbacks(readyCallbacks, error, stage, peerId, exchangeMgr, optionalSessionHandle, requestedBusyDelay);
394 0 : }
395 :
396 0 : void OperationalSessionSetup::NotifyConnectionCallbacks(SuccessFailureCallbackList & ready, CHIP_ERROR error,
397 : SessionEstablishmentStage stage, const ScopedNodeId & peerId,
398 : Messaging::ExchangeManager * exchangeMgr,
399 : const Optional<SessionHandle> & optionalSessionHandle,
400 : System::Clock::Milliseconds16 requestedBusyDelay)
401 : {
402 : Callback::Callback<OnDeviceConnected> * onConnected;
403 : Callback::Callback<OnDeviceConnectionFailure> * onConnectionFailure;
404 : Callback::Callback<OnSetupFailure> * onSetupFailure;
405 0 : while (ready.Take(onConnected, onConnectionFailure, onSetupFailure))
406 : {
407 0 : if (error == CHIP_NO_ERROR)
408 : {
409 0 : VerifyOrDie(exchangeMgr);
410 0 : VerifyOrDie(optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession());
411 0 : if (onConnected != nullptr)
412 : {
413 0 : onConnected->mCall(onConnected->mContext, *exchangeMgr, optionalSessionHandle.Value());
414 :
415 : // That sucessful call might have made the session inactive. If it did, then we should
416 : // not call any more success callbacks, since we do not in fact have an active session
417 : // for them, and if they try to put the session in a holder that will fail, and then
418 : // trying to use the holder as if it has a session will crash.
419 0 : if (!optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession())
420 : {
421 0 : ChipLogError(Discovery, "Success callback for connection to " ChipLogFormatScopedNodeId " tore down session",
422 : ChipLogValueScopedNodeId(peerId));
423 0 : error = CHIP_ERROR_CONNECTION_ABORTED;
424 : }
425 : }
426 : }
427 : else // error
428 : {
429 0 : if (onConnectionFailure != nullptr)
430 : {
431 0 : onConnectionFailure->mCall(onConnectionFailure->mContext, peerId, error);
432 : }
433 0 : if (onSetupFailure != nullptr)
434 : {
435 0 : ConnectionFailureInfo failureInfo(peerId, error, stage);
436 : #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
437 0 : if (error == CHIP_ERROR_BUSY)
438 : {
439 0 : failureInfo.requestedBusyDelay.Emplace(requestedBusyDelay);
440 : }
441 : #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
442 0 : onSetupFailure->mCall(onSetupFailure->mContext, failureInfo);
443 : }
444 : }
445 : }
446 0 : }
447 :
448 0 : void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage)
449 : {
450 0 : VerifyOrReturn(mState == State::Connecting,
451 : ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting"));
452 :
453 : // If this condition ever changes, we may need to store the error in a
454 : // member instead of having a boolean
455 : // mTryingNextResultDueToSessionEstablishmentError, so we can recover the
456 : // error in UpdateDeviceData.
457 0 : if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
458 : {
459 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
460 : // Make a copy of the ReliableMessageProtocolConfig, since our
461 : // mCaseClient is about to go away once we change state.
462 0 : ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals();
463 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
464 :
465 : // Move to the ResolvingAddress state, in case we have more results,
466 : // since we expect to receive results in that state.
467 0 : MoveToState(State::ResolvingAddress);
468 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
469 0 : mTryingNextResultDueToSessionEstablishmentError = true;
470 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
471 0 : if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
472 : {
473 : // Whatever work we needed to do has been handled by our
474 : // OnNodeAddressResolved callback. Make sure not to touch `this`
475 : // under here, because it might have been deleted by
476 : // OnNodeAddressResolved.
477 0 : return;
478 : }
479 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
480 0 : mTryingNextResultDueToSessionEstablishmentError = false;
481 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
482 :
483 : // Moving back to the Connecting state would be a bit of a lie, since we
484 : // don't have an mCASEClient. Just go back to NeedsAddress, since
485 : // that's really where we are now.
486 0 : MoveToState(State::NeedsAddress);
487 :
488 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
489 0 : if (mRemainingAttempts > 0)
490 : {
491 : System::Clock::Seconds16 reattemptDelay;
492 0 : CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay);
493 0 : if (err == CHIP_NO_ERROR)
494 : {
495 0 : MoveToState(State::WaitingForRetry);
496 0 : NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay);
497 0 : return;
498 : }
499 : }
500 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
501 : }
502 :
503 : // Session failed to be established. This is when discovery is also stopped
504 : MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error);
505 : MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error);
506 :
507 0 : DequeueConnectionCallbacks(error, stage);
508 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
509 : }
510 :
511 0 : void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
512 : {
513 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES || CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
514 : // Store the requested delay, so that we can use it for scheduling our
515 : // retry or communicate it to our API consumer.
516 0 : mRequestedBusyDelay = requestedDelay;
517 : #endif
518 0 : }
519 :
520 0 : void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
521 : {
522 0 : VerifyOrReturn(mState == State::Connecting,
523 : ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting"));
524 :
525 : // Session has been established. This is when discovery is also stopped
526 : MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR);
527 :
528 : MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR);
529 :
530 0 : if (!mSecureSession.Grab(session))
531 : {
532 : // Got an invalid session, just dispatch an error. We have to do this
533 : // so we don't leak.
534 0 : DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
535 :
536 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
537 0 : return;
538 : }
539 :
540 0 : MoveToState(State::SecureConnected);
541 :
542 0 : DequeueConnectionCallbacks(CHIP_NO_ERROR);
543 : }
544 :
545 0 : void OperationalSessionSetup::CleanupCASEClient()
546 : {
547 0 : if (mCASEClient)
548 : {
549 0 : mClientPool->Release(mCASEClient);
550 0 : mCASEClient = nullptr;
551 : }
552 0 : }
553 :
554 0 : OperationalSessionSetup::~OperationalSessionSetup()
555 : {
556 0 : if (mAddressLookupHandle.IsActive())
557 : {
558 0 : ChipLogDetail(Discovery,
559 : "OperationalSessionSetup[%u:" ChipLogFormatX64
560 : "]: Cancelling incomplete address resolution as device is being deleted.",
561 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
562 :
563 : // Skip cancel callback since the destructor is being called, so we assume that this object is
564 : // obviously not used anymore
565 0 : CHIP_ERROR err = Resolver::Instance().CancelLookup(mAddressLookupHandle, Resolver::FailureCallback::Skip);
566 0 : if (err != CHIP_NO_ERROR)
567 : {
568 0 : ChipLogError(Discovery, "Lookup cancel failed: %" CHIP_ERROR_FORMAT, err.Format());
569 : }
570 : }
571 :
572 0 : if (mCASEClient)
573 : {
574 : // Make sure we don't leak it.
575 0 : mClientPool->Release(mCASEClient);
576 : }
577 :
578 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
579 0 : CancelSessionSetupReattempt();
580 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
581 :
582 0 : DequeueConnectionCallbacks(CHIP_ERROR_CANCELLED, ReleaseBehavior::DoNotRelease);
583 0 : }
584 :
585 0 : CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
586 : {
587 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
588 0 : if (mRemainingAttempts > 0)
589 : {
590 0 : --mRemainingAttempts;
591 : }
592 0 : if (mAttemptsDone < UINT8_MAX)
593 : {
594 0 : ++mAttemptsDone;
595 : }
596 0 : if (mResolveAttemptsAllowed > 0)
597 : {
598 0 : --mResolveAttemptsAllowed;
599 : }
600 : MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
601 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
602 :
603 : // NOTE: This is public API that can be used to update our stored peer
604 : // address even when we are in State::Connected, so we do not make any
605 : // MoveToState calls in this method.
606 0 : if (mAddressLookupHandle.IsActive())
607 : {
608 0 : ChipLogProgress(Discovery,
609 : "OperationalSessionSetup[%u:" ChipLogFormatX64
610 : "]: Operational node lookup already in progress. Will NOT start a new one.",
611 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
612 0 : return CHIP_NO_ERROR;
613 : }
614 :
615 : // This code can be reached multiple times, if we discover multiple addresses or do retries.
616 : // The metric backend can handle this and always picks the earliest occurrence as the start of the event.
617 : MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery);
618 :
619 0 : auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex());
620 0 : VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX);
621 :
622 0 : PeerId peerId(fabricInfo->GetCompressedFabricId(), mPeerId.GetNodeId());
623 :
624 0 : NodeLookupRequest request(peerId);
625 :
626 0 : return Resolver::Instance().LookupNode(request, mAddressLookupHandle);
627 : }
628 :
629 0 : void OperationalSessionSetup::PerformAddressUpdate()
630 : {
631 0 : if (mPerformingAddressUpdate)
632 : {
633 : // We are already in the middle of a lookup from a previous call to
634 : // PerformAddressUpdate. In that case we will just exit right away as
635 : // we are already looking to update the results from the previous lookup.
636 0 : return;
637 : }
638 :
639 : // We must be newly-allocated to handle this address lookup, so must be in the NeedsAddress state.
640 0 : VerifyOrDie(mState == State::NeedsAddress);
641 :
642 : // We are doing an address lookup whether we have an active session for this peer or not.
643 0 : mPerformingAddressUpdate = true;
644 0 : MoveToState(State::ResolvingAddress);
645 0 : CHIP_ERROR err = LookupPeerAddress();
646 0 : if (err != CHIP_NO_ERROR)
647 : {
648 0 : ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
649 0 : DequeueConnectionCallbacks(err);
650 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
651 0 : return;
652 : }
653 : }
654 :
655 0 : void OperationalSessionSetup::OnNodeAddressResolved(const PeerId & peerId, const ResolveResult & result)
656 : {
657 0 : UpdateDeviceData(result);
658 0 : }
659 :
660 0 : void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason)
661 : {
662 0 : ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
663 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());
664 :
665 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
666 : // If we're in a mode where we would generally retry CASE, retry operational
667 : // discovery if we're allowed to. That allows us to more-gracefully handle broken networks
668 : // where multicast DNS does not actually work and hence only the initial
669 : // unicast DNS-SD queries get a response.
670 : //
671 : // We check for State::ResolvingAddress just in case in the meantime
672 : // something weird happened and we are no longer trying to resolve an
673 : // address.
674 0 : if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0)
675 : {
676 0 : ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed);
677 :
678 : // Pretend like our previous attempt (i.e. call to LookupPeerAddress)
679 : // has not happened for purposes of the generic attempt counters, so we
680 : // don't mess up the counters for our actual CASE retry logic.
681 0 : if (mRemainingAttempts < UINT8_MAX)
682 : {
683 0 : ++mRemainingAttempts;
684 : }
685 0 : if (mAttemptsDone > 0)
686 : {
687 0 : --mAttemptsDone;
688 : }
689 :
690 : MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
691 :
692 0 : CHIP_ERROR err = LookupPeerAddress();
693 0 : if (err == CHIP_NO_ERROR)
694 : {
695 : // We need to notify our consumer that the resolve will take more
696 : // time, but we don't actually know how much time it will take,
697 : // because the resolver does not expose that information. Just use
698 : // one minute to be safe.
699 : using namespace chip::System::Clock::Literals;
700 0 : NotifyRetryHandlers(reason, 60_s16);
701 0 : return;
702 : }
703 : }
704 : #endif
705 :
706 : MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason);
707 :
708 : // No need to modify any variables in `this` since call below releases `this`.
709 0 : DequeueConnectionCallbacks(reason);
710 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
711 : }
712 :
713 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
714 0 : void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
715 : {
716 0 : if (attemptCount == 0)
717 : {
718 : // Nothing to do.
719 0 : return;
720 : }
721 :
722 0 : if (mState != State::NeedsAddress)
723 : {
724 : // We're in the middle of an attempt already, so decrement attemptCount
725 : // by 1 to account for that.
726 0 : --attemptCount;
727 : }
728 :
729 0 : if (attemptCount > mRemainingAttempts)
730 : {
731 0 : mRemainingAttempts = attemptCount;
732 : }
733 :
734 0 : if (attemptCount > mResolveAttemptsAllowed)
735 : {
736 0 : mResolveAttemptsAllowed = attemptCount;
737 : }
738 : }
739 :
740 0 : CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay)
741 : {
742 0 : VerifyOrDie(mRemainingAttempts > 0);
743 : // Try again, but not if things are in shutdown such that we can't get
744 : // to a system layer, and not if we've run out of attempts.
745 0 : if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer())
746 : {
747 0 : return CHIP_ERROR_INCORRECT_STATE;
748 : }
749 :
750 0 : MoveToState(State::NeedsAddress);
751 : // Stop exponential backoff before our delays get too large.
752 : //
753 : // Note that mAttemptsDone is always > 0 here, because we have
754 : // just finished one attempt.
755 0 : VerifyOrDie(mAttemptsDone > 0);
756 : static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
757 : (1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
758 : "Our backoff calculation will overflow.");
759 0 : System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
760 0 : static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
761 0 : << std::min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
762 0 : const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
763 0 : if (responseWasBusy)
764 : {
765 0 : if (mRequestedBusyDelay > actualTimerDelay)
766 : {
767 0 : actualTimerDelay = mRequestedBusyDelay;
768 : }
769 :
770 : // Reset mRequestedBusyDelay now that we have consumed it, so it does
771 : // not affect future reattempts not triggered by a busy response.
772 0 : mRequestedBusyDelay = System::Clock::kZero;
773 : }
774 :
775 0 : if (mAttemptsDone % 2 == 0)
776 : {
777 : // It's possible that the other side received one of our Sigma1 messages
778 : // and then failed to get its Sigma2 back to us. If that's the case, it
779 : // will be waiting for that Sigma2 to time out before it starts
780 : // listening for Sigma1 messages again.
781 : //
782 : // To handle that, on every other retry, add the amount of time it would
783 : // take the other side to time out. It would be nice if we could rely
784 : // on the delay reported in a BUSY response to just tell us that value,
785 : // but in practice for old devices BUSY often sends some hardcoded value
786 : // that tells us nothing about when the other side will decide it has
787 : // timed out.
788 : //
789 : // Unfortunately, we do not have the MRP config for the other side here,
790 : // but in practice if the other side is using its local config to
791 : // compute Sigma2 response timeouts, then it's also returning useful
792 : // values with BUSY, so we will wait long enough.
793 0 : auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
794 0 : actualTimerDelay += additionalTimeout;
795 : }
796 0 : timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);
797 :
798 0 : CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);
799 :
800 : // TODO: If responseWasBusy, should we increment, mRemainingAttempts and
801 : // mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to
802 : // tell what consumers expect out of a capped retry count here.
803 :
804 : // The cast on count() is needed because the type count() returns might not
805 : // actually be uint16_t; on some platforms it's int.
806 0 : ChipLogProgress(Discovery,
807 : "OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT,
808 : mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format());
809 0 : return err;
810 : }
811 :
812 0 : void OperationalSessionSetup::CancelSessionSetupReattempt()
813 : {
814 : // If we can't get a system layer, there is no way for us to cancel things
815 : // at this point, but hopefully that's because everything is torn down
816 : // anyway and hence the timer will not fire.
817 0 : auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager();
818 0 : VerifyOrReturn(sessionManager != nullptr);
819 :
820 0 : auto * systemLayer = sessionManager->SystemLayer();
821 0 : VerifyOrReturn(systemLayer != nullptr);
822 :
823 0 : systemLayer->CancelTimer(TrySetupAgain, this);
824 : }
825 :
826 0 : void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state)
827 : {
828 0 : auto * self = static_cast<OperationalSessionSetup *>(state);
829 :
830 0 : self->MoveToState(State::ResolvingAddress);
831 0 : CHIP_ERROR err = self->LookupPeerAddress();
832 0 : if (err == CHIP_NO_ERROR)
833 : {
834 0 : return;
835 : }
836 :
837 : // Give up; we could not start a lookup.
838 0 : self->DequeueConnectionCallbacks(err);
839 : // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
840 : }
841 :
842 0 : void OperationalSessionSetup::AddRetryHandler(Callback::Callback<OnDeviceConnectionRetry> * onRetry)
843 : {
844 0 : mConnectionRetry.Enqueue(onRetry->Cancel());
845 0 : }
846 :
847 0 : void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig,
848 : System::Clock::Seconds16 retryDelay)
849 : {
850 : // Compute the time we are likely to need to detect that the retry has
851 : // failed.
852 0 : System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig);
853 0 : auto timeoutSecs = std::chrono::duration_cast<System::Clock::Seconds16>(messageTimeout);
854 : // Add 1 second in case we had fractional milliseconds in messageTimeout.
855 : using namespace chip::System::Clock::Literals;
856 0 : NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay);
857 0 : }
858 :
859 0 : void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate)
860 : {
861 : // We have to be very careful here: Calling into these handlers might in
862 : // theory destroy the Callback objects involved, but unlike the
863 : // succcess/failure cases we don't want to just clear the handlers from our
864 : // list when we are calling them, because we might need to call a given
865 : // handler more than once.
866 : //
867 : // To handle this we:
868 : //
869 : // 1) Snapshot the list of handlers up front, so if any of the handlers
870 : // triggers an AddRetryHandler with some other handler that does not
871 : // affect the list we plan to notify here.
872 : //
873 : // 2) When planning to notify a handler move it to a new list that contains
874 : // just that handler. This way if it gets canceled as part of the
875 : // notification we can tell it has been canceled.
876 : //
877 : // 3) If notifying the handler does not cancel it, add it back to our list
878 : // of handlers so we will notify it on future retries.
879 :
880 0 : Cancelable retryHandlerListSnapshot;
881 0 : mConnectionRetry.DequeueAll(retryHandlerListSnapshot);
882 :
883 0 : while (retryHandlerListSnapshot.mNext != &retryHandlerListSnapshot)
884 : {
885 0 : auto * cb = Callback::Callback<OnDeviceConnectionRetry>::FromCancelable(retryHandlerListSnapshot.mNext);
886 :
887 0 : Callback::CallbackDeque currentCallbackHolder;
888 0 : currentCallbackHolder.Enqueue(cb->Cancel());
889 :
890 0 : cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate);
891 :
892 0 : if (currentCallbackHolder.mNext != ¤tCallbackHolder)
893 : {
894 : // Callback has not been canceled as part of the call, so is still
895 : // supposed to be registered with us.
896 0 : AddRetryHandler(cb);
897 : }
898 0 : }
899 0 : }
900 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
901 :
902 : } // namespace chip
|