Line data Source code
1 : /*
2 : *
3 : * Copyright (c) 2020-2021 Project CHIP Authors
4 : * All rights reserved.
5 : *
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : */
18 :
19 : /**
20 : * @file
21 : * This file contains implementation of Device class. The objects of this
22 : * class will be used by Controller applications to interact with CHIP
23 : * devices. The class provides mechanism to construct, send and receive
24 : * messages to and from the corresponding CHIP devices.
25 : */
26 :
27 : #include <app/OperationalSessionSetup.h>
28 :
29 : #include <app/CASEClient.h>
30 : #include <app/InteractionModelEngine.h>
31 : #include <transport/SecureSession.h>
32 :
33 : #include <lib/address_resolve/AddressResolve.h>
34 : #include <lib/core/CHIPCore.h>
35 : #include <lib/core/CHIPEncoding.h>
36 : #include <lib/dnssd/Resolver.h>
37 : #include <lib/support/CodeUtils.h>
38 : #include <lib/support/logging/CHIPLogging.h>
39 : #include <system/SystemClock.h>
40 : #include <system/SystemLayer.h>
41 : #include <tracing/metric_event.h>
42 :
43 : using namespace chip::Callback;
44 : using chip::AddressResolve::NodeLookupRequest;
45 : using chip::AddressResolve::Resolver;
46 : using chip::AddressResolve::ResolveResult;
47 : using namespace chip::Tracing;
48 :
49 : namespace chip {
50 :
51 0 : void OperationalSessionSetup::MoveToState(State aTargetState)
52 : {
53 0 : if (mState != aTargetState)
54 : {
55 0 : ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d",
56 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
57 : to_underlying(aTargetState));
58 :
59 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
60 0 : if (mState == State::WaitingForRetry)
61 : {
62 0 : CancelSessionSetupReattempt();
63 : }
64 : #endif
65 :
66 0 : mState = aTargetState;
67 :
68 0 : if (aTargetState != State::Connecting)
69 : {
70 0 : CleanupCASEClient();
71 : }
72 : }
73 0 : }
74 :
75 0 : bool OperationalSessionSetup::AttachToExistingSecureSession()
76 : {
77 0 : VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress ||
78 : mState == State::WaitingForRetry,
79 : false);
80 :
81 0 : auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode(
82 0 : mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE), mTransportPayloadCapability);
83 0 : if (!sessionHandle.HasValue())
84 0 : return false;
85 :
86 0 : ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(),
87 : ChipLogValueX64(mPeerId.GetNodeId()));
88 :
89 0 : mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress();
90 0 : if (!mSecureSession.Grab(sessionHandle.Value()))
91 0 : return false;
92 :
93 0 : return true;
94 0 : }
95 :
96 0 : void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
97 : Callback::Callback<OnDeviceConnectionFailure> * onFailure,
98 : Callback::Callback<OnSetupFailure> * onSetupFailure,
99 : TransportPayloadCapability transportPayloadCapability)
100 : {
101 0 : CHIP_ERROR err = CHIP_NO_ERROR;
102 0 : bool isConnected = false;
103 :
104 0 : mTransportPayloadCapability = transportPayloadCapability;
105 : //
106 : // Always enqueue our user provided callbacks into our callback list.
107 : // If anything goes wrong below, we'll trigger failures (including any queued from
108 : // a previous iteration which in theory shouldn't happen, but this is written to be more defensive)
109 : //
110 0 : EnqueueConnectionCallbacks(onConnection, onFailure, onSetupFailure);
111 :
112 0 : switch (mState)
113 : {
114 0 : case State::Uninitialized:
115 0 : err = CHIP_ERROR_INCORRECT_STATE;
116 0 : break;
117 :
118 0 : case State::NeedsAddress:
119 0 : isConnected = AttachToExistingSecureSession();
120 0 : if (!isConnected)
121 : {
122 : // LookupPeerAddress could perhaps call back with a result
123 : // synchronously, so do our state update first.
124 0 : MoveToState(State::ResolvingAddress);
125 0 : err = LookupPeerAddress();
126 0 : if (err != CHIP_NO_ERROR)
127 : {
128 : // Roll back the state change, since we are presumably not in
129 : // the middle of a lookup.
130 0 : MoveToState(State::NeedsAddress);
131 : }
132 : }
133 :
134 0 : break;
135 :
136 0 : case State::ResolvingAddress:
137 : case State::WaitingForRetry:
138 0 : isConnected = AttachToExistingSecureSession();
139 0 : break;
140 :
141 0 : case State::HasAddress:
142 0 : isConnected = AttachToExistingSecureSession();
143 0 : if (!isConnected)
144 : {
145 : // We should not actually every be in be in State::HasAddress. This
146 : // is because in the same call that we moved to State::HasAddress
147 : // we either move to State::Connecting or call
148 : // DequeueConnectionCallbacks with an error thus releasing
149 : // ourselves before any call would reach this section of code.
150 0 : err = CHIP_ERROR_INCORRECT_STATE;
151 : }
152 :
153 0 : break;
154 :
155 0 : case State::Connecting:
156 0 : break;
157 :
158 0 : case State::SecureConnected:
159 0 : isConnected = true;
160 0 : break;
161 :
162 0 : default:
163 0 : err = CHIP_ERROR_INCORRECT_STATE;
164 : }
165 :
166 0 : if (isConnected)
167 : {
168 0 : MoveToState(State::SecureConnected);
169 : }
170 :
171 : //
172 : // Dequeue all our callbacks on either encountering an error
173 : // or if we successfully connected. Both should not be set
174 : // simultaneously.
175 : //
176 0 : if (err != CHIP_NO_ERROR || isConnected)
177 : {
178 0 : DequeueConnectionCallbacks(err);
179 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
180 : // While it is odd to have an explicit return here at the end of the function, we do so
181 : // as a precaution in case someone later on adds something to the end of this function.
182 0 : return;
183 : }
184 : }
185 :
186 0 : void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
187 : Callback::Callback<OnDeviceConnectionFailure> * onFailure,
188 : TransportPayloadCapability transportPayloadCapability)
189 : {
190 0 : Connect(onConnection, onFailure, nullptr, transportPayloadCapability);
191 0 : }
192 :
193 0 : void OperationalSessionSetup::Connect(Callback::Callback<OnDeviceConnected> * onConnection,
194 : Callback::Callback<OnSetupFailure> * onSetupFailure,
195 : TransportPayloadCapability transportPayloadCapability)
196 : {
197 0 : Connect(onConnection, nullptr, onSetupFailure, transportPayloadCapability);
198 0 : }
199 :
200 0 : void OperationalSessionSetup::UpdateDeviceData(const ResolveResult & result)
201 : {
202 0 : auto & config = result.mrpRemoteConfig;
203 0 : auto addr = result.address;
204 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
205 : // Make sure to clear out our reason for trying the next result first thing,
206 : // so it does not stick around in various error cases.
207 0 : bool tryingNextResultDueToSessionEstablishmentError = mTryingNextResultDueToSessionEstablishmentError;
208 0 : mTryingNextResultDueToSessionEstablishmentError = false;
209 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
210 :
211 0 : if (mState == State::Uninitialized)
212 : {
213 0 : return;
214 : }
215 :
216 : #if CHIP_DETAIL_LOGGING
217 : char peerAddrBuff[Transport::PeerAddress::kMaxToStringSize];
218 0 : addr.ToString(peerAddrBuff);
219 :
220 0 : ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Updating device address to %s while in state %d",
221 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), peerAddrBuff, static_cast<int>(mState));
222 : #endif
223 :
224 0 : mDeviceAddress = addr;
225 :
226 : // Initialize CASE session state with any MRP parameters that DNS-SD has provided.
227 : // It can be overridden by CASE session protocol messages that include MRP parameters.
228 0 : if (mCASEClient)
229 : {
230 0 : mCASEClient->SetRemoteMRPIntervals(config);
231 : }
232 :
233 0 : if (mState != State::ResolvingAddress)
234 : {
235 0 : ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state");
236 0 : DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
237 : // Do not touch `this` instance anymore; it has been destroyed in
238 : // DequeueConnectionCallbacks.
239 0 : return;
240 : }
241 :
242 0 : MoveToState(State::HasAddress);
243 0 : mInitParams.sessionManager->UpdateAllSessionsPeerAddress(mPeerId, addr);
244 :
245 0 : if (mPerformingAddressUpdate)
246 : {
247 : // Nothing else to do here.
248 0 : DequeueConnectionCallbacks(CHIP_NO_ERROR);
249 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
250 0 : return;
251 : }
252 :
253 0 : CHIP_ERROR err = EstablishConnection(result);
254 0 : LogErrorOnFailure(err);
255 0 : if (err == CHIP_NO_ERROR)
256 : {
257 : // We expect to get a callback via OnSessionEstablished or OnSessionEstablishmentError to continue
258 : // the state machine forward.
259 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
260 0 : if (tryingNextResultDueToSessionEstablishmentError)
261 : {
262 : // Our retry has already been kicked off, so claim 0 delay until it
263 : // starts. We only reach this from OnSessionEstablishmentError when
264 : // the error is CHIP_ERROR_TIMEOUT.
265 0 : NotifyRetryHandlers(CHIP_ERROR_TIMEOUT, config, System::Clock::kZero);
266 : }
267 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
268 0 : return;
269 : }
270 :
271 : // Move to the ResolvingAddress state, in case we have more results,
272 : // since we expect to receive results in that state. Pretend like we moved
273 : // on directly to this address from whatever triggered us to try this result
274 : // (so restore mTryingNextResultDueToSessionEstablishmentError to the value
275 : // it had at the start of this function).
276 0 : MoveToState(State::ResolvingAddress);
277 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
278 0 : mTryingNextResultDueToSessionEstablishmentError = tryingNextResultDueToSessionEstablishmentError;
279 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
280 0 : if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
281 : {
282 : // No need to NotifyRetryHandlers, since we never actually spent any
283 : // time trying the previous result. Whatever work we need to do has
284 : // been handled by our recursive OnNodeAddressResolved callback. Make
285 : // sure not to touch `this` under here, because it might have been
286 : // deleted by OnNodeAddressResolved.
287 0 : return;
288 : }
289 :
290 : // No need to reset mTryingNextResultDueToSessionEstablishmentError here,
291 : // because we're about to delete ourselves.
292 :
293 0 : DequeueConnectionCallbacks(err);
294 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
295 : }
296 :
297 0 : CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ResolveResult & result)
298 : {
299 0 : auto & config = result.mrpRemoteConfig;
300 : #if INET_CONFIG_ENABLE_TCP_ENDPOINT
301 0 : if (mTransportPayloadCapability == TransportPayloadCapability::kLargePayload)
302 : {
303 0 : if (result.supportsTcpServer)
304 : {
305 : // Set the transport type for carrying large payloads
306 0 : mDeviceAddress.SetTransportType(chip::Transport::Type::kTcp);
307 : }
308 : else
309 : {
310 : // we should not set the large payload while the TCP support is not enabled
311 0 : ChipLogError(
312 : Discovery,
313 : "LargePayload session requested but peer does not support TCP server, PeerNodeId=" ChipLogFormatScopedNodeId,
314 : ChipLogValueScopedNodeId(mPeerId));
315 0 : return CHIP_ERROR_INTERNAL;
316 : }
317 : }
318 : #endif
319 :
320 0 : mCASEClient = mClientPool->Allocate();
321 0 : VerifyOrReturnError(mCASEClient != nullptr, CHIP_ERROR_NO_MEMORY);
322 :
323 : MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
324 0 : CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this);
325 0 : if (err != CHIP_NO_ERROR)
326 : {
327 : MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
328 0 : CleanupCASEClient();
329 0 : return err;
330 : }
331 :
332 0 : MoveToState(State::Connecting);
333 :
334 0 : return CHIP_NO_ERROR;
335 : }
336 :
337 0 : void OperationalSessionSetup::EnqueueConnectionCallbacks(Callback::Callback<OnDeviceConnected> * onConnection,
338 : Callback::Callback<OnDeviceConnectionFailure> * onFailure,
339 : Callback::Callback<OnSetupFailure> * onSetupFailure)
340 : {
341 0 : mCallbacks.Enqueue(onConnection, onFailure, onSetupFailure);
342 0 : }
343 :
344 0 : void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error, SessionEstablishmentStage stage,
345 : ReleaseBehavior releaseBehavior)
346 : {
347 : // We expect that we only have callbacks if we are not performing just address update.
348 0 : VerifyOrDie(!mPerformingAddressUpdate || mCallbacks.IsEmpty());
349 :
350 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
351 : // Clear out mConnectionRetry, so that those cancelables are not holding
352 : // pointers to us, since we're about to go away.
353 0 : while (auto * cb = mConnectionRetry.First())
354 : {
355 0 : cb->Cancel();
356 0 : }
357 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
358 :
359 : // Gather up state we will need for our notifications.
360 0 : SuccessFailureCallbackList readyCallbacks;
361 0 : readyCallbacks.EnqueueTakeAll(mCallbacks);
362 0 : auto * exchangeMgr = mInitParams.exchangeMgr;
363 0 : Optional<SessionHandle> optionalSessionHandle = mSecureSession.Get();
364 0 : ScopedNodeId peerId = mPeerId;
365 0 : System::Clock::Milliseconds16 requestedBusyDelay =
366 : #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
367 : mRequestedBusyDelay;
368 : #else
369 : System::Clock::kZero;
370 : #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
371 :
372 0 : if (releaseBehavior == ReleaseBehavior::Release)
373 : {
374 0 : VerifyOrDie(mReleaseDelegate != nullptr);
375 0 : mReleaseDelegate->ReleaseSession(this);
376 : }
377 :
378 : // DO NOT touch any members of this object after this point. It's dead.
379 0 : NotifyConnectionCallbacks(readyCallbacks, error, stage, peerId, exchangeMgr, optionalSessionHandle, requestedBusyDelay);
380 0 : }
381 :
382 0 : void OperationalSessionSetup::NotifyConnectionCallbacks(SuccessFailureCallbackList & ready, CHIP_ERROR error,
383 : SessionEstablishmentStage stage, const ScopedNodeId & peerId,
384 : Messaging::ExchangeManager * exchangeMgr,
385 : const Optional<SessionHandle> & optionalSessionHandle,
386 : System::Clock::Milliseconds16 requestedBusyDelay)
387 : {
388 : Callback::Callback<OnDeviceConnected> * onConnected;
389 : Callback::Callback<OnDeviceConnectionFailure> * onConnectionFailure;
390 : Callback::Callback<OnSetupFailure> * onSetupFailure;
391 0 : while (ready.Take(onConnected, onConnectionFailure, onSetupFailure))
392 : {
393 0 : if (error == CHIP_NO_ERROR)
394 : {
395 0 : VerifyOrDie(exchangeMgr);
396 0 : VerifyOrDie(optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession());
397 0 : if (onConnected != nullptr)
398 : {
399 0 : onConnected->mCall(onConnected->mContext, *exchangeMgr, optionalSessionHandle.Value());
400 :
401 : // That sucessful call might have made the session inactive. If it did, then we should
402 : // not call any more success callbacks, since we do not in fact have an active session
403 : // for them, and if they try to put the session in a holder that will fail, and then
404 : // trying to use the holder as if it has a session will crash.
405 0 : if (!optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession())
406 : {
407 0 : ChipLogError(Discovery, "Success callback for connection to " ChipLogFormatScopedNodeId " tore down session",
408 : ChipLogValueScopedNodeId(peerId));
409 0 : error = CHIP_ERROR_CONNECTION_ABORTED;
410 : }
411 : }
412 : }
413 : else // error
414 : {
415 0 : if (onConnectionFailure != nullptr)
416 : {
417 0 : onConnectionFailure->mCall(onConnectionFailure->mContext, peerId, error);
418 : }
419 0 : if (onSetupFailure != nullptr)
420 : {
421 0 : ConnectionFailureInfo failureInfo(peerId, error, stage);
422 : #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
423 0 : if (error == CHIP_ERROR_BUSY)
424 : {
425 0 : failureInfo.requestedBusyDelay.Emplace(requestedBusyDelay);
426 : }
427 : #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
428 0 : onSetupFailure->mCall(onSetupFailure->mContext, failureInfo);
429 0 : }
430 : }
431 : }
432 0 : }
433 :
434 0 : void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage)
435 : {
436 0 : VerifyOrReturn(mState == State::Connecting,
437 : ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting"));
438 :
439 : // If this condition ever changes, we may need to store the error in a
440 : // member instead of having a boolean
441 : // mTryingNextResultDueToSessionEstablishmentError, so we can recover the
442 : // error in UpdateDeviceData.
443 0 : if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
444 : {
445 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
446 : // Make a copy of the ReliableMessageProtocolConfig, since our
447 : // mCaseClient is about to go away once we change state.
448 0 : ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals();
449 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
450 :
451 : // Move to the ResolvingAddress state, in case we have more results,
452 : // since we expect to receive results in that state.
453 0 : MoveToState(State::ResolvingAddress);
454 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
455 0 : mTryingNextResultDueToSessionEstablishmentError = true;
456 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
457 0 : if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle))
458 : {
459 : // Whatever work we needed to do has been handled by our
460 : // OnNodeAddressResolved callback. Make sure not to touch `this`
461 : // under here, because it might have been deleted by
462 : // OnNodeAddressResolved.
463 0 : return;
464 : }
465 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
466 0 : mTryingNextResultDueToSessionEstablishmentError = false;
467 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
468 :
469 : // Moving back to the Connecting state would be a bit of a lie, since we
470 : // don't have an mCASEClient. Just go back to NeedsAddress, since
471 : // that's really where we are now.
472 0 : MoveToState(State::NeedsAddress);
473 :
474 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
475 0 : if (mRemainingAttempts > 0)
476 : {
477 : System::Clock::Seconds16 reattemptDelay;
478 0 : CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay);
479 0 : if (err == CHIP_NO_ERROR)
480 : {
481 0 : MoveToState(State::WaitingForRetry);
482 0 : NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay);
483 0 : return;
484 : }
485 : }
486 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
487 : }
488 :
489 : // Session failed to be established. This is when discovery is also stopped
490 : MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error);
491 : MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error);
492 :
493 0 : DequeueConnectionCallbacks(error, stage);
494 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
495 : }
496 :
497 0 : void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
498 : {
499 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES || CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
500 : // Store the requested delay, so that we can use it for scheduling our
501 : // retry or communicate it to our API consumer.
502 0 : mRequestedBusyDelay = requestedDelay;
503 : #endif
504 0 : }
505 :
506 0 : void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
507 : {
508 0 : VerifyOrReturn(mState == State::Connecting,
509 : ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting"));
510 :
511 : // Session has been established. This is when discovery is also stopped
512 : MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR);
513 :
514 : MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR);
515 :
516 0 : if (!mSecureSession.Grab(session))
517 : {
518 : // Got an invalid session, just dispatch an error. We have to do this
519 : // so we don't leak.
520 0 : DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE);
521 :
522 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
523 0 : return;
524 : }
525 :
526 0 : MoveToState(State::SecureConnected);
527 :
528 0 : DequeueConnectionCallbacks(CHIP_NO_ERROR);
529 : }
530 :
531 0 : void OperationalSessionSetup::CleanupCASEClient()
532 : {
533 0 : if (mCASEClient)
534 : {
535 0 : mClientPool->Release(mCASEClient);
536 0 : mCASEClient = nullptr;
537 : }
538 0 : }
539 :
540 0 : OperationalSessionSetup::~OperationalSessionSetup()
541 : {
542 0 : if (mAddressLookupHandle.IsActive())
543 : {
544 0 : ChipLogDetail(Discovery,
545 : "OperationalSessionSetup[%u:" ChipLogFormatX64
546 : "]: Cancelling incomplete address resolution as device is being deleted.",
547 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
548 :
549 : // Skip cancel callback since the destructor is being called, so we assume that this object is
550 : // obviously not used anymore
551 0 : CHIP_ERROR err = Resolver::Instance().CancelLookup(mAddressLookupHandle, Resolver::FailureCallback::Skip);
552 0 : if (err != CHIP_NO_ERROR)
553 : {
554 0 : ChipLogError(Discovery, "Lookup cancel failed: %" CHIP_ERROR_FORMAT, err.Format());
555 : }
556 : }
557 :
558 0 : if (mCASEClient)
559 : {
560 : // Make sure we don't leak it.
561 0 : mClientPool->Release(mCASEClient);
562 : }
563 :
564 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
565 0 : CancelSessionSetupReattempt();
566 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
567 :
568 0 : DequeueConnectionCallbacks(CHIP_ERROR_CANCELLED, ReleaseBehavior::DoNotRelease);
569 0 : }
570 :
571 0 : CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
572 : {
573 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
574 0 : if (mRemainingAttempts > 0)
575 : {
576 0 : --mRemainingAttempts;
577 : }
578 0 : if (mAttemptsDone < UINT8_MAX)
579 : {
580 0 : ++mAttemptsDone;
581 : }
582 0 : if (mResolveAttemptsAllowed > 0)
583 : {
584 0 : --mResolveAttemptsAllowed;
585 : }
586 : MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
587 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
588 :
589 : // NOTE: This is public API that can be used to update our stored peer
590 : // address even when we are in State::Connected, so we do not make any
591 : // MoveToState calls in this method.
592 0 : if (mAddressLookupHandle.IsActive())
593 : {
594 0 : ChipLogProgress(Discovery,
595 : "OperationalSessionSetup[%u:" ChipLogFormatX64
596 : "]: Operational node lookup already in progress. Will NOT start a new one.",
597 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()));
598 0 : return CHIP_NO_ERROR;
599 : }
600 :
601 : // This code can be reached multiple times, if we discover multiple addresses or do retries.
602 : // The metric backend can handle this and always picks the earliest occurrence as the start of the event.
603 : MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery);
604 :
605 0 : auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex());
606 0 : VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX);
607 :
608 0 : PeerId peerId(fabricInfo->GetCompressedFabricId(), mPeerId.GetNodeId());
609 :
610 0 : NodeLookupRequest request(peerId);
611 :
612 0 : return Resolver::Instance().LookupNode(request, mAddressLookupHandle);
613 : }
614 :
615 0 : void OperationalSessionSetup::PerformAddressUpdate()
616 : {
617 0 : if (mPerformingAddressUpdate)
618 : {
619 : // We are already in the middle of a lookup from a previous call to
620 : // PerformAddressUpdate. In that case we will just exit right away as
621 : // we are already looking to update the results from the previous lookup.
622 0 : return;
623 : }
624 :
625 : // We must be newly-allocated to handle this address lookup, so must be in the NeedsAddress state.
626 0 : VerifyOrDie(mState == State::NeedsAddress);
627 :
628 : // We are doing an address lookup whether we have an active session for this peer or not.
629 0 : mPerformingAddressUpdate = true;
630 0 : MoveToState(State::ResolvingAddress);
631 0 : CHIP_ERROR err = LookupPeerAddress();
632 0 : if (err != CHIP_NO_ERROR)
633 : {
634 0 : ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format());
635 0 : DequeueConnectionCallbacks(err);
636 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
637 0 : return;
638 : }
639 : }
640 :
641 0 : void OperationalSessionSetup::OnNodeAddressResolved(const PeerId & peerId, const ResolveResult & result)
642 : {
643 0 : UpdateDeviceData(result);
644 0 : }
645 :
646 0 : void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason)
647 : {
648 0 : ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
649 : mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());
650 :
651 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
652 : // If we're in a mode where we would generally retry CASE, retry operational
653 : // discovery if we're allowed to. That allows us to more-gracefully handle broken networks
654 : // where multicast DNS does not actually work and hence only the initial
655 : // unicast DNS-SD queries get a response.
656 : //
657 : // We check for State::ResolvingAddress just in case in the meantime
658 : // something weird happened and we are no longer trying to resolve an
659 : // address.
660 0 : if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0)
661 : {
662 0 : ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed);
663 :
664 : // Pretend like our previous attempt (i.e. call to LookupPeerAddress)
665 : // has not happened for purposes of the generic attempt counters, so we
666 : // don't mess up the counters for our actual CASE retry logic.
667 0 : if (mRemainingAttempts < UINT8_MAX)
668 : {
669 0 : ++mRemainingAttempts;
670 : }
671 0 : if (mAttemptsDone > 0)
672 : {
673 0 : --mAttemptsDone;
674 : }
675 :
676 : MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
677 :
678 0 : CHIP_ERROR err = LookupPeerAddress();
679 0 : if (err == CHIP_NO_ERROR)
680 : {
681 : // We need to notify our consumer that the resolve will take more
682 : // time, but we don't actually know how much time it will take,
683 : // because the resolver does not expose that information. Just use
684 : // one minute to be safe.
685 : using namespace chip::System::Clock::Literals;
686 0 : NotifyRetryHandlers(reason, 60_s16);
687 0 : return;
688 : }
689 : }
690 : #endif
691 :
692 : MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason);
693 :
694 : // No need to modify any variables in `this` since call below releases `this`.
695 0 : DequeueConnectionCallbacks(reason);
696 : // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
697 : }
698 :
699 : #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
700 0 : void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
701 : {
702 0 : if (attemptCount == 0)
703 : {
704 : // Nothing to do.
705 0 : return;
706 : }
707 :
708 0 : if (mState != State::NeedsAddress)
709 : {
710 : // We're in the middle of an attempt already, so decrement attemptCount
711 : // by 1 to account for that.
712 0 : --attemptCount;
713 : }
714 :
715 0 : if (attemptCount > mRemainingAttempts)
716 : {
717 0 : mRemainingAttempts = attemptCount;
718 : }
719 :
720 0 : if (attemptCount > mResolveAttemptsAllowed)
721 : {
722 0 : mResolveAttemptsAllowed = attemptCount;
723 : }
724 : }
725 :
726 0 : CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay)
727 : {
728 0 : VerifyOrDie(mRemainingAttempts > 0);
729 : // Try again, but not if things are in shutdown such that we can't get
730 : // to a system layer, and not if we've run out of attempts.
731 0 : if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer())
732 : {
733 0 : return CHIP_ERROR_INCORRECT_STATE;
734 : }
735 :
736 0 : MoveToState(State::NeedsAddress);
737 : // Stop exponential backoff before our delays get too large.
738 : //
739 : // Note that mAttemptsDone is always > 0 here, because we have
740 : // just finished one attempt.
741 0 : VerifyOrDie(mAttemptsDone > 0);
742 : static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
743 : (1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
744 : "Our backoff calculation will overflow.");
745 0 : System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
746 0 : static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
747 0 : << std::min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
748 0 : const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
749 0 : if (responseWasBusy)
750 : {
751 0 : if (mRequestedBusyDelay > actualTimerDelay)
752 : {
753 0 : actualTimerDelay = mRequestedBusyDelay;
754 : }
755 :
756 : // Reset mRequestedBusyDelay now that we have consumed it, so it does
757 : // not affect future reattempts not triggered by a busy response.
758 0 : mRequestedBusyDelay = System::Clock::kZero;
759 : }
760 :
761 0 : if (mAttemptsDone % 2 == 0)
762 : {
763 : // It's possible that the other side received one of our Sigma1 messages
764 : // and then failed to get its Sigma2 back to us. If that's the case, it
765 : // will be waiting for that Sigma2 to time out before it starts
766 : // listening for Sigma1 messages again.
767 : //
768 : // To handle that, on every other retry, add the amount of time it would
769 : // take the other side to time out. It would be nice if we could rely
770 : // on the delay reported in a BUSY response to just tell us that value,
771 : // but in practice for old devices BUSY often sends some hardcoded value
772 : // that tells us nothing about when the other side will decide it has
773 : // timed out.
774 : //
775 : // Unfortunately, we do not have the MRP config for the other side here,
776 : // but in practice if the other side is using its local config to
777 : // compute Sigma2 response timeouts, then it's also returning useful
778 : // values with BUSY, so we will wait long enough.
779 0 : auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
780 0 : actualTimerDelay += additionalTimeout;
781 : }
782 0 : timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);
783 :
784 0 : CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);
785 :
786 : // TODO: If responseWasBusy, should we increment, mRemainingAttempts and
787 : // mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to
788 : // tell what consumers expect out of a capped retry count here.
789 :
790 : // The cast on count() is needed because the type count() returns might not
791 : // actually be uint16_t; on some platforms it's int.
792 0 : ChipLogProgress(Discovery,
793 : "OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT,
794 : mAttemptsDone, mRemainingAttempts, static_cast<unsigned>(timerDelay.count()), err.Format());
795 0 : return err;
796 : }
797 :
798 0 : void OperationalSessionSetup::CancelSessionSetupReattempt()
799 : {
800 : // If we can't get a system layer, there is no way for us to cancel things
801 : // at this point, but hopefully that's because everything is torn down
802 : // anyway and hence the timer will not fire.
803 0 : auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager();
804 0 : VerifyOrReturn(sessionManager != nullptr);
805 :
806 0 : auto * systemLayer = sessionManager->SystemLayer();
807 0 : VerifyOrReturn(systemLayer != nullptr);
808 :
809 0 : systemLayer->CancelTimer(TrySetupAgain, this);
810 : }
811 :
812 0 : void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state)
813 : {
814 0 : auto * self = static_cast<OperationalSessionSetup *>(state);
815 :
816 0 : self->MoveToState(State::ResolvingAddress);
817 0 : CHIP_ERROR err = self->LookupPeerAddress();
818 0 : if (err == CHIP_NO_ERROR)
819 : {
820 0 : return;
821 : }
822 :
823 : // Give up; we could not start a lookup.
824 0 : self->DequeueConnectionCallbacks(err);
825 : // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
826 : }
827 :
828 0 : void OperationalSessionSetup::AddRetryHandler(Callback::Callback<OnDeviceConnectionRetry> * onRetry)
829 : {
830 0 : mConnectionRetry.Enqueue(onRetry->Cancel());
831 0 : }
832 :
833 0 : void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig,
834 : System::Clock::Seconds16 retryDelay)
835 : {
836 : // Compute the time we are likely to need to detect that the retry has
837 : // failed.
838 0 : System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig);
839 0 : auto timeoutSecs = std::chrono::duration_cast<System::Clock::Seconds16>(messageTimeout);
840 : // Add 1 second in case we had fractional milliseconds in messageTimeout.
841 : using namespace chip::System::Clock::Literals;
842 0 : NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay);
843 0 : }
844 :
845 0 : void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate)
846 : {
847 : // We have to be very careful here: Calling into these handlers might in
848 : // theory destroy the Callback objects involved, but unlike the
849 : // succcess/failure cases we don't want to just clear the handlers from our
850 : // list when we are calling them, because we might need to call a given
851 : // handler more than once.
852 : //
853 : // To handle this we:
854 : //
855 : // 1) Snapshot the list of handlers up front, so if any of the handlers
856 : // triggers an AddRetryHandler with some other handler that does not
857 : // affect the list we plan to notify here.
858 : //
859 : // 2) When planning to notify a handler move it to a new list that contains
860 : // just that handler. This way if it gets canceled as part of the
861 : // notification we can tell it has been canceled.
862 : //
863 : // 3) If notifying the handler does not cancel it, add it back to our list
864 : // of handlers so we will notify it on future retries.
865 :
866 0 : Cancelable retryHandlerListSnapshot;
867 0 : mConnectionRetry.DequeueAll(retryHandlerListSnapshot);
868 :
869 0 : while (retryHandlerListSnapshot.mNext != &retryHandlerListSnapshot)
870 : {
871 0 : auto * cb = Callback::Callback<OnDeviceConnectionRetry>::FromCancelable(retryHandlerListSnapshot.mNext);
872 :
873 0 : Callback::CallbackDeque currentCallbackHolder;
874 0 : currentCallbackHolder.Enqueue(cb->Cancel());
875 :
876 0 : cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate);
877 :
878 0 : if (currentCallbackHolder.mNext != ¤tCallbackHolder)
879 : {
880 : // Callback has not been canceled as part of the call, so is still
881 : // supposed to be registered with us.
882 0 : AddRetryHandler(cb);
883 : }
884 0 : }
885 0 : }
886 : #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
887 :
888 : } // namespace chip
|