Matter SDK Coverage Report
Current view: top level - lib/support - utf8.cpp (source / functions) Coverage Total Hit
Test: SHA:209dc18e4021e7d0dff8120ccc585909391dd862 Lines: 93.4 % 61 57
Test Date: 2026-06-16 07:34:53 Functions: 100.0 % 2 2

            Line data    Source code
       1              : /*
       2              :  *
       3              :  *    Copyright (c) 2023 Project CHIP Authors
       4              :  *    All rights reserved.
       5              :  *
       6              :  *    Licensed under the Apache License, Version 2.0 (the "License");
       7              :  *    you may not use this file except in compliance with the License.
       8              :  *    You may obtain a copy of the License at
       9              :  *
      10              :  *        http://www.apache.org/licenses/LICENSE-2.0
      11              :  *
      12              :  *    Unless required by applicable law or agreed to in writing, software
      13              :  *    distributed under the License is distributed on an "AS IS" BASIS,
      14              :  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      15              :  *    See the License for the specific language governing permissions and
      16              :  *    limitations under the License.
      17              :  */
      18              : #include "utf8.h"
      19              : 
      20              : namespace chip {
      21              : namespace Utf8 {
      22              : 
      23              : namespace {
      24              : /**
      25              :    State machine for UTF8 valid bytes
      26              : 
      27              : Table 3-7. Well-Formed UTF-8 Byte Sequences
      28              : 
      29              : Code Points       | First B  | Second B   | Third B | Fourth B
      30              : ------------------+----------+------------+---------+---------
      31              : U+0000..U+007F    | 00..7F   |            |         |
      32              : U+0080..U+07FF    | C2..DF   | 80..BF     |         |
      33              : U+0800..U+0FFF    | E0       | A0..BF (A) | 80..BF  |
      34              : U+1000..U+CFFF    | E1..EC   | 80..BF     | 80..BF  |
      35              : U+D000..U+D7FF    | ED       | 80..9F (B) | 80..BF  |
      36              : U+E000..U+FFFF    | EE..EF   | 80..BF     | 80..BF  |
      37              : U+10000..U+3FFFF  | F0       | 90..BF (C) | 80..BF  | 80..BF
      38              : U+40000..U+FFFFF  | F1..F3   | 80..BF     | 80..BF  | 80..BF
      39              : U+100000..U+10FFFF| F4       | 80..8F (D) | 80..BF  | 80..BF
      40              : */
      41              : 
      42              : enum class ParserState
      43              : {
      44              :     kFirstByte,
      45              :     kSecondByte_A,
      46              :     kSecondByte_B,
      47              :     kSecondByte_C,
      48              :     kSecondByte_D,
      49              :     kExtraOneByte,    // 0x80 .. 0xBF  once
      50              :     kExtraTwoBytes,   // 0x80 .. 0xBF  twice
      51              :     kExtraThreeBytes, // 0x80 .. 0xBF  three times
      52              :     //
      53              :     kInvalid, // some error
      54              : };
      55              : 
      56       139657 : ParserState NextState(ParserState state, uint8_t value)
      57              : {
      58       139657 :     switch (state)
      59              :     {
      60       139520 :     case ParserState::kFirstByte:
      61       139520 :         if (value <= 0x7F)
      62              :         {
      63       139402 :             return ParserState::kFirstByte;
      64              :         }
      65          118 :         if ((value >= 0xC2) && (value <= 0xDF))
      66              :         {
      67           20 :             return ParserState::kExtraOneByte;
      68              :         }
      69           98 :         if (value == 0xE0)
      70              :         {
      71            6 :             return ParserState::kSecondByte_A;
      72              :         }
      73           92 :         if ((value >= 0xE1) && (value <= 0xEC))
      74              :         {
      75            4 :             return ParserState::kExtraTwoBytes;
      76              :         }
      77           88 :         if (value == 0xED)
      78              :         {
      79            8 :             return ParserState::kSecondByte_B;
      80              :         }
      81           80 :         if ((value >= 0xEE) && (value <= 0xEF))
      82              :         {
      83           37 :             return ParserState::kExtraTwoBytes;
      84              :         }
      85           43 :         if (value == 0xF0)
      86              :         {
      87           10 :             return ParserState::kSecondByte_C;
      88              :         }
      89           33 :         if ((value >= 0xF1) && (value <= 0xF3))
      90              :         {
      91            3 :             return ParserState::kExtraThreeBytes;
      92              :         }
      93           30 :         if (value == 0xF4)
      94              :         {
      95            7 :             return ParserState::kSecondByte_D;
      96              :         }
      97           23 :         return ParserState::kInvalid;
      98            5 :     case ParserState::kSecondByte_A:
      99            5 :         if (value >= 0xA0 && value <= 0xBF)
     100              :         {
     101            1 :             return ParserState::kExtraOneByte;
     102              :         }
     103            4 :         return ParserState::kInvalid;
     104            8 :     case ParserState::kSecondByte_B:
     105            8 :         if (value >= 0x80 && value <= 0x9F)
     106              :         {
     107            3 :             return ParserState::kExtraOneByte;
     108              :         }
     109            5 :         return ParserState::kInvalid;
     110            9 :     case ParserState::kSecondByte_C:
     111            9 :         if (value >= 0x90 && value <= 0xBF)
     112              :         {
     113            5 :             return ParserState::kExtraTwoBytes;
     114              :         }
     115            4 :         return ParserState::kInvalid;
     116            6 :     case ParserState::kSecondByte_D:
     117            6 :         if (value >= 0x80 && value <= 0x8F)
     118              :         {
     119            1 :             return ParserState::kExtraTwoBytes;
     120              :         }
     121            5 :         return ParserState::kInvalid;
     122           61 :     case ParserState::kExtraOneByte:
     123           61 :         if (value >= 0x80 && value <= 0xBF)
     124              :         {
     125           60 :             return ParserState::kFirstByte;
     126              :         }
     127            1 :         return ParserState::kInvalid;
     128           46 :     case ParserState::kExtraTwoBytes:
     129           46 :         if (value >= 0x80 && value <= 0xBF)
     130              :         {
     131           46 :             return ParserState::kExtraOneByte;
     132              :         }
     133            0 :         return ParserState::kInvalid;
     134            2 :     case ParserState::kExtraThreeBytes:
     135            2 :         if (value >= 0x80 && value <= 0xBF)
     136              :         {
     137            2 :             return ParserState::kExtraTwoBytes;
     138              :         }
     139            0 :         return ParserState::kInvalid;
     140            0 :     default:
     141            0 :         return ParserState::kInvalid;
     142              :     }
     143              : }
     144              : 
     145              : } // namespace
     146              : 
     147         5367 : bool IsValid(CharSpan span)
     148              : {
     149         5367 :     ParserState state = ParserState::kFirstByte;
     150              : 
     151         5367 :     const char * data    = span.data();
     152         5367 :     const size_t kLength = span.size();
     153              : 
     154              :     // Every byte should be valid
     155       144982 :     for (size_t i = 0; i < kLength; i++)
     156              :     {
     157       139657 :         state = NextState(state, static_cast<uint8_t>(data[i]));
     158              : 
     159       139657 :         if (state == ParserState::kInvalid)
     160              :         {
     161           42 :             return false;
     162              :         }
     163              :     }
     164              : 
     165              :     // finally no continuation should be expected
     166         5325 :     return state == ParserState::kFirstByte;
     167              : }
     168              : 
     169              : } // namespace Utf8
     170              : } // namespace chip
        

Generated by: LCOV version 2.0-1