From 0af4e6587eb65993a939d773b558feb9ac52006a Mon Sep 17 00:00:00 2001 From: k1ngsterr1 Date: Thu, 26 Mar 2026 21:27:17 +0500 Subject: [PATCH] android: expand native e2ee diagnostics --- app/src/main/cpp/rosetta_e2ee.cpp | 427 +++++++++++++++++++++++++++--- 1 file changed, 390 insertions(+), 37 deletions(-) diff --git a/app/src/main/cpp/rosetta_e2ee.cpp b/app/src/main/cpp/rosetta_e2ee.cpp index ed7fc4e..320876b 100644 --- a/app/src/main/cpp/rosetta_e2ee.cpp +++ b/app/src/main/cpp/rosetta_e2ee.cpp @@ -34,6 +34,10 @@ static char g_diag_path[512] = {0}; static int g_diag_fd = -1; +static std::atomic g_diag_event_count{0}; +static constexpr int kDiagEventLimit = 4000; +static constexpr int kDiagFrameLimit = 400; +static constexpr size_t kFrameHashSampleBytes = 320; static void diag_write(const char *fmt, ...) { if (g_diag_fd < 0) return; @@ -45,6 +49,50 @@ static void diag_write(const char *fmt, ...) { if (n > 0) write(g_diag_fd, buf, n); } +static void diag_event(const char *fmt, ...) { + if (g_diag_fd < 0) return; + const int idx = g_diag_event_count.fetch_add(1, std::memory_order_relaxed); + if (idx >= kDiagEventLimit) return; + char buf[640]; + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + if (n > 0) write(g_diag_fd, buf, n); +} + +static inline uint32_t fnv1a32(const uint8_t* data, size_t len, size_t sample_limit = 0) { + if (!data || len == 0) return 0; + const size_t n = (sample_limit > 0 && len > sample_limit) ? sample_limit : len; + uint32_t h = 2166136261u; + for (size_t i = 0; i < n; ++i) { + h ^= data[i]; + h *= 16777619u; + } + return h; +} + +static inline uint32_t key_fingerprint32(const uint8_t key[32]) { + return fnv1a32(key, 32, 32); +} + +static inline uint32_t nonce_ts32(const uint8_t nonce[24]) { + return ((uint32_t)nonce[4] << 24) | + ((uint32_t)nonce[5] << 16) | + ((uint32_t)nonce[6] << 8) | + ((uint32_t)nonce[7]); +} + +static const char* media_type_name(cricket::MediaType media_type) { + switch (media_type) { + case cricket::MEDIA_TYPE_AUDIO: return "audio"; + case cricket::MEDIA_TYPE_VIDEO: return "video"; + case cricket::MEDIA_TYPE_DATA: return "data"; + case cricket::MEDIA_TYPE_UNSUPPORTED: return "unsupported"; + default: return "unknown"; + } +} + /* ── RTP helpers (for cases when additional_data is empty) ───── */ struct ParsedRtpPacket { @@ -72,6 +120,11 @@ struct GeneratedTsState { uint32_t next_step = 960; // 20 ms @ 48 kHz (default Opus packetization) }; +struct AdditionalTsState { + bool initialized = false; + uint64_t base_timestamp = 0; +}; + static inline uint16_t load16_be(const uint8_t* p) { return (uint16_t)(((uint16_t)p[0] << 8) | (uint16_t)p[1]); } @@ -83,6 +136,17 @@ static inline uint32_t load32_be(const uint8_t* p) { ((uint32_t)p[3]); } +static inline uint64_t load64_be(const uint8_t* p) { + return ((uint64_t)p[0] << 56) | + ((uint64_t)p[1] << 48) | + ((uint64_t)p[2] << 40) | + ((uint64_t)p[3] << 32) | + ((uint64_t)p[4] << 24) | + ((uint64_t)p[5] << 16) | + ((uint64_t)p[6] << 8) | + ((uint64_t)p[7]); +} + static bool parse_rtp_packet(const uint8_t* data, size_t len, ParsedRtpPacket* out) { if (!data || !out || len < 12) return false; @@ -133,6 +197,8 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data, state->probe_ssrc = packet.ssrc; state->probe_sequence = packet.sequence; state->probe_timestamp = packet.timestamp; + diag_event("RTP probe-start ssrc=%u seq=%u ts=%u hdr=%zu\n", + packet.ssrc, packet.sequence, packet.timestamp, packet.header_size); return false; } @@ -151,8 +217,12 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data, state->ssrc = packet.ssrc; state->last_sequence = packet.sequence; state->last_timestamp = packet.timestamp; + diag_event("RTP probe-lock ssrc=%u seq=%u ts=%u hdr=%zu\n", + packet.ssrc, packet.sequence, packet.timestamp, packet.header_size); } else { if (packet.ssrc != state->ssrc) { + diag_event("RTP probe-unlock reason=ssrc-change old=%u new=%u seq=%u ts=%u\n", + state->ssrc, packet.ssrc, packet.sequence, packet.timestamp); state->locked = false; state->has_probe = true; state->probe_ssrc = packet.ssrc; @@ -168,6 +238,8 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data, state->last_timestamp = packet.timestamp; } else if (seq_delta != 0) { // Not plausible for a continuous stream: re-probe. + diag_event("RTP probe-unlock reason=seq-jump delta=%u ssrc=%u last_seq=%u seq=%u ts=%u\n", + seq_delta, packet.ssrc, state->last_sequence, packet.sequence, packet.timestamp); state->locked = false; state->has_probe = true; state->probe_ssrc = packet.ssrc; @@ -188,14 +260,39 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data, static bool fill_nonce_from_additional_data(const uint8_t* data, size_t len, uint8_t nonce[24], - bool* used_rtp_header) { + bool* used_rtp_header, + AdditionalTsState* ts_state, + bool* used_relative_ts) { if (used_rtp_header) *used_rtp_header = false; + if (used_relative_ts) *used_relative_ts = false; if (!data || len < 8) return false; // Desktop-compatible path: additional_data contains encoded frame timestamp - // as 8-byte BE value. Use it directly as nonce[0..7]. + // as 8-byte BE value. On Android sender/receiver can have different absolute + // base in some pipelines; ts_state enables optional relative fallback. + // Primary desktop-compatible mode uses absolute timestamp (ts_state == nullptr). if (len == 8) { - memcpy(nonce, data, 8); + uint64_t ts64 = load64_be(data); + uint64_t ts_rel = ts64; + if (ts_state != nullptr) { + if (!ts_state->initialized) { + ts_state->initialized = true; + ts_state->base_timestamp = ts64; + } else if (ts64 < ts_state->base_timestamp) { + // New stream or reset; avoid huge wrapped nonce drift. + ts_state->base_timestamp = ts64; + } + ts_rel = ts64 - ts_state->base_timestamp; + if (used_relative_ts) *used_relative_ts = true; + } + nonce[0] = (uint8_t)(ts_rel >> 56); + nonce[1] = (uint8_t)(ts_rel >> 48); + nonce[2] = (uint8_t)(ts_rel >> 40); + nonce[3] = (uint8_t)(ts_rel >> 32); + nonce[4] = (uint8_t)(ts_rel >> 24); + nonce[5] = (uint8_t)(ts_rel >> 16); + nonce[6] = (uint8_t)(ts_rel >> 8); + nonce[7] = (uint8_t)(ts_rel); return true; } @@ -218,6 +315,18 @@ static bool fill_nonce_from_additional_data(const uint8_t* data, return true; } +static bool is_plausible_opus_packet(const uint8_t* packet, size_t len); + +static bool is_plausible_decrypted_audio_frame(const uint8_t* data, size_t len) { + if (!data || len == 0) return false; + + ParsedRtpPacket packet; + if (parse_rtp_packet(data, len, &packet) && packet.header_size < len) { + return is_plausible_opus_packet(data + packet.header_size, len - packet.header_size); + } + return is_plausible_opus_packet(data, len); +} + static inline void fill_nonce_from_ts32(uint32_t ts, uint8_t nonce[24]) { nonce[4] = (uint8_t)(ts >> 24); nonce[5] = (uint8_t)(ts >> 16); @@ -263,18 +372,94 @@ static uint32_t infer_opus_packet_duration_samples(const uint8_t* packet, size_t return total; } +static bool decode_opus_frame_len(const uint8_t* packet, + size_t len, + size_t* offset, + size_t* frame_len) { + if (!packet || !offset || !frame_len) return false; + if (*offset >= len) return false; + + const uint8_t b0 = packet[*offset]; + (*offset)++; + if (b0 < 252) { + *frame_len = b0; + return true; + } + if (*offset >= len) return false; + const uint8_t b1 = packet[*offset]; + (*offset)++; + *frame_len = (size_t)b1 * 4u + (size_t)b0; + return true; +} + static bool is_plausible_opus_packet(const uint8_t* packet, size_t len) { if (!packet || len == 0 || len > 2000) return false; + const uint8_t toc = packet[0]; const uint8_t config = (uint8_t)(toc >> 3); if (config > 31) return false; + const uint8_t frame_code = (uint8_t)(toc & 0x03); - if (frame_code != 3) return true; + const size_t payload_len = len - 1; + + if (frame_code == 0) { + // 1 frame, full payload + return payload_len >= 1; + } + + if (frame_code == 1) { + // 2 CBR frames, equal sizes. + if (payload_len < 2) return false; + return (payload_len % 2) == 0; + } + + if (frame_code == 2) { + // 2 VBR frames + size_t off = 1; + size_t len1 = 0; + if (!decode_opus_frame_len(packet, len, &off, &len1)) return false; + if (len1 == 0) return false; + if (off + len1 >= len) return false; // need non-empty second frame + const size_t len2 = len - off - len1; + if (len2 == 0) return false; + return true; + } + + // frame_code == 3: arbitrary number of frames if (len < 2) return false; - const uint8_t frame_count = (uint8_t)(packet[1] & 0x3F); + const uint8_t ch = packet[1]; + const bool vbr = (ch & 0x80) != 0; + const bool has_padding = (ch & 0x40) != 0; + const uint8_t frame_count = (uint8_t)(ch & 0x3F); if (frame_count == 0 || frame_count > 48) return false; + const uint32_t total = opus_base_frame_samples(config) * (uint32_t)frame_count; - return total <= 5760; + if (total > 5760) return false; + + // Padding bit is rarely used in live voice. Rejecting it improves + // discrimination between valid Opus and random decrypted noise. + if (has_padding) return false; + + if (!vbr) { + const size_t data_len = len - 2; + if (data_len < (size_t)frame_count) return false; + return (data_len % frame_count) == 0; + } + + // VBR: parse lengths for first N-1 frames; last frame consumes rest. + size_t off = 2; + size_t consumed = 0; + for (size_t i = 0; i + 1 < frame_count; ++i) { + size_t flen = 0; + if (!decode_opus_frame_len(packet, len, &off, &flen)) return false; + if (flen == 0) return false; + consumed += flen; + if (off + consumed >= len) return false; + } + const size_t remaining = len - off; + if (remaining <= consumed) return false; + const size_t last = remaining - consumed; + return last > 0; } /* ── Native crash handler — writes to file before dying ──────── */ @@ -314,8 +499,13 @@ class XChaCha20Encryptor final : public webrtc::FrameEncryptorInterface { public: explicit XChaCha20Encryptor(const uint8_t key[32]) { memcpy(key_, key, 32); + key_fingerprint_ = key_fingerprint32(key_); + LOGI("ENC init ptr=%p key_fp=%08x", this, key_fingerprint_); + diag_event("ENC init ptr=%p key_fp=%08x\n", this, key_fingerprint_); } + uint32_t KeyFingerprint() const { return key_fingerprint_; } + /* ── RefCountInterface ─────────────────────────────────────── */ void AddRef() const override { ref_.fetch_add(1, std::memory_order_relaxed); @@ -344,8 +534,8 @@ public: * If RTP header is found inside frame, we leave header bytes unencrypted * and encrypt only payload (desktop-compatible). */ - int Encrypt(cricket::MediaType /*media_type*/, - uint32_t /*ssrc*/, + int Encrypt(cricket::MediaType media_type, + uint32_t ssrc, rtc::ArrayView additional_data, rtc::ArrayView frame, rtc::ArrayView encrypted_frame, @@ -364,11 +554,14 @@ public: // Build nonce from RTP timestamp in additional_data (preferred). uint8_t nonce[24] = {0}; + bool additional_used_relative_ts = false; nonce_from_additional_data = fill_nonce_from_additional_data( additional_data.data(), additional_data.size(), nonce, - &additional_was_rtp_header); + &additional_was_rtp_header, + nullptr, + &additional_used_relative_ts); if (!nonce_from_additional_data) { nonce_from_rtp_header = fill_nonce_from_rtp_frame(frame.data(), frame.size(), &rtp_probe_, nonce, &header_size); @@ -381,6 +574,8 @@ public: nonce_from_generated_ts = true; generated_ts_used = generated_ts_.next_timestamp; fill_nonce_from_ts32(generated_ts_used, nonce); + diag_event("ENC fallback=generated-ts mt=%s ssrc=%u frame_sz=%zu ad_sz=%zu gen_ts=%u\n", + media_type_name(media_type), ssrc, frame.size(), additional_data.size(), generated_ts_used); } } @@ -409,23 +604,46 @@ public: generated_ts_.next_timestamp = generated_ts_used + step; } - // Diag: log first 3 frames + // Diag: log first frames with enough context for crash analysis. int n = diag_count_.fetch_add(1, std::memory_order_relaxed); - if (n < 3) { + if (n < kDiagFrameLimit) { + uint8_t ad_prefix[8] = {0}; + const size_t ad_copy = additional_data.size() < sizeof(ad_prefix) + ? additional_data.size() + : sizeof(ad_prefix); + if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy); + ParsedRtpPacket rtp{}; + const bool has_rtp = parse_rtp_packet(frame.data(), frame.size(), &rtp); + const bool opus_plausible = + has_rtp && rtp.header_size < frame.size() + ? is_plausible_opus_packet(frame.data() + rtp.header_size, frame.size() - rtp.header_size) + : is_plausible_opus_packet(frame.data(), frame.size()); + const uint32_t in_hash = fnv1a32(frame.data(), frame.size(), kFrameHashSampleBytes); + const uint32_t out_hash = fnv1a32(encrypted_frame.data(), frame.size(), kFrameHashSampleBytes); const char* mode = nonce_from_rtp_header ? "rtp" : (nonce_from_generated_ts ? "gen" : (nonce_from_additional_data - ? (additional_was_rtp_header ? "ad-rtp" : "raw-abs") + ? (additional_was_rtp_header + ? "ad-rtp" + : (additional_used_relative_ts ? "raw-rel" : "raw-abs")) : "raw-abs")); - LOGI("ENC frame#%d: sz=%zu ad=%zu hdr=%zu mode=%s nonce=%02x%02x%02x%02x", - n, frame.size(), additional_data.size(), header_size, mode, - nonce[4], nonce[5], nonce[6], nonce[7]); - diag_write("ENC frame#%d: sz=%zu ad=%zu hdr=%zu mode=%s nonce[4..7]=%02x%02x%02x%02x\n", - n, frame.size(), additional_data.size(), header_size, mode, - nonce[4], nonce[5], nonce[6], nonce[7]); + LOGI("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x", + n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode, + nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step, + has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0, + opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash, + ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3], + ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]); + diag_write("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n", + n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode, + nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step, + has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0, + opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash, + ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3], + ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]); } return 0; } @@ -435,13 +653,17 @@ public: } protected: - ~XChaCha20Encryptor() override { memset(key_, 0, 32); } + ~XChaCha20Encryptor() override { + diag_event("ENC destroy ptr=%p key_fp=%08x\n", this, key_fingerprint_); + memset(key_, 0, 32); + } private: mutable std::atomic ref_{0}; mutable std::atomic diag_count_{0}; mutable RtpProbeState rtp_probe_; mutable GeneratedTsState generated_ts_; + uint32_t key_fingerprint_ = 0; uint8_t key_[32]; }; @@ -453,8 +675,13 @@ class XChaCha20Decryptor final : public webrtc::FrameDecryptorInterface { public: explicit XChaCha20Decryptor(const uint8_t key[32]) { memcpy(key_, key, 32); + key_fingerprint_ = key_fingerprint32(key_); + LOGI("DEC init ptr=%p key_fp=%08x", this, key_fingerprint_); + diag_event("DEC init ptr=%p key_fp=%08x\n", this, key_fingerprint_); } + uint32_t KeyFingerprint() const { return key_fingerprint_; } + /* ── RefCountInterface ─────────────────────────────────────── */ void AddRef() const override { ref_.fetch_add(1, std::memory_order_relaxed); @@ -474,8 +701,8 @@ public: * - if RTP header is present inside encrypted_frame (fallback path), * keep header bytes untouched and decrypt payload only. */ - Result Decrypt(cricket::MediaType /*media_type*/, - const std::vector& /*csrcs*/, + Result Decrypt(cricket::MediaType media_type, + const std::vector& csrcs, rtc::ArrayView additional_data, rtc::ArrayView encrypted_frame, rtc::ArrayView frame) override { @@ -485,12 +712,17 @@ public: bool nonce_from_generated_ts = false; bool nonce_from_additional_data = false; bool additional_was_rtp_header = false; + bool additional_used_relative_ts = false; + bool used_additional_relative_fallback = false; + bool used_plain_passthrough = false; uint32_t generated_ts_used = 0; nonce_from_additional_data = fill_nonce_from_additional_data( additional_data.data(), additional_data.size(), nonce, - &additional_was_rtp_header); + &additional_was_rtp_header, + nullptr, + &additional_used_relative_ts); if (!nonce_from_additional_data) { nonce_from_rtp_header = fill_nonce_from_rtp_frame(encrypted_frame.data(), encrypted_frame.size(), &rtp_probe_, nonce, &header_size); @@ -503,6 +735,8 @@ public: nonce_from_generated_ts = true; generated_ts_used = generated_ts_.next_timestamp; fill_nonce_from_ts32(generated_ts_used, nonce); + diag_event("DEC fallback=generated-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu gen_ts=%u\n", + media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), generated_ts_used); } } @@ -527,6 +761,53 @@ public: rosetta_xchacha20_xor(frame.data(), encrypted_frame.data(), encrypted_frame.size(), nonce, key_); } + if (nonce_from_additional_data) { + bool plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size()); + + // Fallback for Android pipelines where additional_data timestamps are + // stream-relative while remote side uses a different absolute base. + if (!plausible && additional_data.size() == 8 && !additional_was_rtp_header) { + uint8_t nonce_rel[24] = {0}; + bool rel_used = false; + if (fill_nonce_from_additional_data( + additional_data.data(), + additional_data.size(), + nonce_rel, + nullptr, + &additional_rel_ts_state_, + &rel_used) && + rel_used) { + std::vector candidate(encrypted_frame.size()); + rosetta_xchacha20_xor( + candidate.data(), + encrypted_frame.data(), + encrypted_frame.size(), + nonce_rel, + key_); + if (is_plausible_decrypted_audio_frame(candidate.data(), candidate.size())) { + memcpy(frame.data(), candidate.data(), candidate.size()); + memcpy(nonce, nonce_rel, sizeof(nonce)); + plausible = true; + used_additional_relative_fallback = true; + additional_used_relative_ts = true; + diag_event("DEC fallback=relative-ad-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu nonce_ts=%u\n", + media_type_name(media_type), csrcs.size(), encrypted_frame.size(), + additional_data.size(), nonce_ts32(nonce)); + } + } + } + + // If payload already looks like valid Opus, keep plaintext. + // This protects interop when peer stream is unexpectedly unencrypted. + if (!plausible && + is_plausible_decrypted_audio_frame(encrypted_frame.data(), encrypted_frame.size())) { + memcpy(frame.data(), encrypted_frame.data(), encrypted_frame.size()); + used_plain_passthrough = true; + diag_event("DEC fallback=plain-passthrough mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu\n", + media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size()); + } + } + if (nonce_from_generated_ts) { bool plausible = is_plausible_opus_packet(frame.data(), encrypted_frame.size()); @@ -548,6 +829,9 @@ public: generated_ts_used = ts_try; used_generated_resync = true; plausible = true; + diag_event("DEC fallback=generated-resync mt=%s csrcs=%zu enc_sz=%zu ts_try=%u step=%u probe=%u\n", + media_type_name(media_type), csrcs.size(), encrypted_frame.size(), + ts_try, generated_ts_.next_step, i); break; } } @@ -558,25 +842,68 @@ public: generated_ts_.next_timestamp = generated_ts_used + step; } - // Diag: log first 3 frames + // Diag: log first frames with enough context for crash analysis. int n = diag_count_.fetch_add(1, std::memory_order_relaxed); - if (n < 3) { + if (n < kDiagFrameLimit) { + uint8_t ad_prefix[8] = {0}; + const size_t ad_copy = additional_data.size() < sizeof(ad_prefix) + ? additional_data.size() + : sizeof(ad_prefix); + if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy); + ParsedRtpPacket enc_rtp{}; + ParsedRtpPacket dec_rtp{}; + const bool has_enc_rtp = parse_rtp_packet(encrypted_frame.data(), encrypted_frame.size(), &enc_rtp); + const bool has_dec_rtp = parse_rtp_packet(frame.data(), encrypted_frame.size(), &dec_rtp); + const bool dec_plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size()); + const uint32_t enc_hash = fnv1a32(encrypted_frame.data(), encrypted_frame.size(), kFrameHashSampleBytes); + const uint32_t dec_hash = fnv1a32(frame.data(), encrypted_frame.size(), kFrameHashSampleBytes); const char* mode = nullptr; if (nonce_from_rtp_header) { mode = "rtp"; } else if (nonce_from_generated_ts) { mode = used_generated_resync ? "gen-resync" : "gen"; } else if (nonce_from_additional_data) { - mode = additional_was_rtp_header ? "ad-rtp" : "raw-abs"; + if (used_plain_passthrough) mode = "raw-plain"; + else if (additional_was_rtp_header) mode = "ad-rtp"; + else if (used_additional_relative_fallback) mode = "raw-rel-fb"; + else mode = additional_used_relative_ts ? "raw-rel" : "raw-abs"; } else { mode = "raw-abs"; } - LOGI("DEC frame#%d: enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce=%02x%02x%02x%02x", - n, encrypted_frame.size(), additional_data.size(), header_size, mode, - nonce[4], nonce[5], nonce[6], nonce[7]); - diag_write("DEC frame#%d: enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce[4..7]=%02x%02x%02x%02x\n", - n, encrypted_frame.size(), additional_data.size(), header_size, mode, - nonce[4], nonce[5], nonce[6], nonce[7]); + uint32_t bad_streak = 0; + if (!dec_plausible) { + bad_streak = bad_audio_streak_.fetch_add(1, std::memory_order_relaxed) + 1; + if (bad_streak == 1 || bad_streak == 3 || bad_streak == 10 || (bad_streak % 50) == 0) { + diag_event("DEC degraded mt=%s csrcs=%zu mode=%s bad_streak=%u nonce_ts=%u key_fp=%08x\n", + media_type_name(media_type), csrcs.size(), mode, bad_streak, + nonce_ts32(nonce), key_fingerprint_); + } + } else { + const uint32_t prev_bad = bad_audio_streak_.exchange(0, std::memory_order_relaxed); + if (prev_bad >= 3) { + diag_event("DEC recovered mt=%s csrcs=%zu mode=%s prev_bad_streak=%u nonce_ts=%u key_fp=%08x\n", + media_type_name(media_type), csrcs.size(), mode, prev_bad, + nonce_ts32(nonce), key_fingerprint_); + } + } + LOGI("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x", + n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode, + nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step, + dec_plausible ? 1 : 0, bad_streak, + has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0, + has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0, + key_fingerprint_, enc_hash, dec_hash, + ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3], + ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]); + diag_write("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n", + n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode, + nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step, + dec_plausible ? 1 : 0, bad_streak, + has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0, + has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0, + key_fingerprint_, enc_hash, dec_hash, + ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3], + ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]); } return {Result::Status::kOk, encrypted_frame.size()}; @@ -587,13 +914,20 @@ public: } protected: - ~XChaCha20Decryptor() override { memset(key_, 0, 32); } + ~XChaCha20Decryptor() override { + diag_event("DEC destroy ptr=%p key_fp=%08x bad_streak=%u\n", + this, key_fingerprint_, bad_audio_streak_.load(std::memory_order_relaxed)); + memset(key_, 0, 32); + } private: mutable std::atomic ref_{0}; mutable std::atomic diag_count_{0}; + mutable std::atomic bad_audio_streak_{0}; mutable RtpProbeState rtp_probe_; mutable GeneratedTsState generated_ts_; + mutable AdditionalTsState additional_rel_ts_state_; + uint32_t key_fingerprint_ = 0; uint8_t key_[32]; }; @@ -633,9 +967,14 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor( JNIEnv *env, jclass, jbyteArray jKey) { jsize len = env->GetArrayLength(jKey); - if (len < 32) return 0; + if (len < 32) { + LOGE("Create encryptor failed: key length=%d (<32)", (int)len); + diag_event("ENC create-failed key_len=%d\n", (int)len); + return 0; + } auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr); + const uint32_t key_fp = key_fingerprint32(key); auto *enc = new XChaCha20Encryptor(key); env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT); @@ -643,7 +982,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor( // WebRTC's scoped_refptr will AddRef again when it takes ownership. enc->AddRef(); - LOGI("Created XChaCha20 encryptor %p", enc); + LOGI("Created XChaCha20 encryptor %p key_fp=%08x", enc, key_fp); + diag_event("ENC created ptr=%p key_fp=%08x key_len=%d\n", enc, key_fp, (int)len); return reinterpret_cast(enc); } @@ -653,6 +993,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseEncryptor( { if (ptr == 0) return; auto *enc = reinterpret_cast(ptr); + LOGI("Release XChaCha20 encryptor %p key_fp=%08x", enc, enc->KeyFingerprint()); + diag_event("ENC release ptr=%p key_fp=%08x\n", enc, enc->KeyFingerprint()); enc->Release(); } @@ -663,15 +1005,21 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateDecryptor( JNIEnv *env, jclass, jbyteArray jKey) { jsize len = env->GetArrayLength(jKey); - if (len < 32) return 0; + if (len < 32) { + LOGE("Create decryptor failed: key length=%d (<32)", (int)len); + diag_event("DEC create-failed key_len=%d\n", (int)len); + return 0; + } auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr); + const uint32_t key_fp = key_fingerprint32(key); auto *dec = new XChaCha20Decryptor(key); env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT); dec->AddRef(); - LOGI("Created XChaCha20 decryptor %p", dec); + LOGI("Created XChaCha20 decryptor %p key_fp=%08x", dec, key_fp); + diag_event("DEC created ptr=%p key_fp=%08x key_len=%d\n", dec, key_fp, (int)len); return reinterpret_cast(dec); } @@ -681,6 +1029,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseDecryptor( { if (ptr == 0) return; auto *dec = reinterpret_cast(ptr); + LOGI("Release XChaCha20 decryptor %p key_fp=%08x", dec, dec->KeyFingerprint()); + diag_event("DEC release ptr=%p key_fp=%08x\n", dec, dec->KeyFingerprint()); dec->Release(); } @@ -712,6 +1062,7 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile( JNIEnv *env, jclass, jstring jPath) { if (g_diag_fd >= 0) { close(g_diag_fd); g_diag_fd = -1; } + g_diag_event_count.store(0, std::memory_order_relaxed); const char *path = env->GetStringUTFChars(jPath, nullptr); strncpy(g_diag_path, path, sizeof(g_diag_path) - 1); @@ -719,8 +1070,9 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile( env->ReleaseStringUTFChars(jPath, path); if (g_diag_fd >= 0) { - diag_write("=== E2EE DIAGNOSTICS ===\n"); + diag_write("=== E2EE DIAGNOSTICS pid=%d ===\n", (int)getpid()); LOGI("Diag file opened: %s", g_diag_path); + diag_event("DIAG open path=%s\n", g_diag_path); } else { LOGE("Failed to open diag file: %s", g_diag_path); } @@ -731,6 +1083,7 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCloseDiagFile( JNIEnv *, jclass) { if (g_diag_fd >= 0) { + diag_event("DIAG close path=%s\n", g_diag_path); diag_write("=== END ===\n"); close(g_diag_fd); g_diag_fd = -1;