From 0af4e6587eb65993a939d773b558feb9ac52006a Mon Sep 17 00:00:00 2001
From: k1ngsterr1 <ruslanmakhmatov@gmail.com>
Date: Thu, 26 Mar 2026 21:27:17 +0500
Subject: [PATCH] android: expand native e2ee diagnostics

---
 app/src/main/cpp/rosetta_e2ee.cpp | 427 +++++++++++++++++++++++++++---
 1 file changed, 390 insertions(+), 37 deletions(-)
diff --git a/app/src/main/cpp/rosetta_e2ee.cpp b/app/src/main/cpp/rosetta_e2ee.cpp
index ed7fc4e..320876b 100644
--- a/app/src/main/cpp/rosetta_e2ee.cpp
+++ b/app/src/main/cpp/rosetta_e2ee.cpp
@@ -34,6 +34,10 @@
 
 static char g_diag_path[512] = {0};
 static int  g_diag_fd = -1;
+static std::atomic<int> g_diag_event_count{0};
+static constexpr int kDiagEventLimit = 4000;
+static constexpr int kDiagFrameLimit = 400;
+static constexpr size_t kFrameHashSampleBytes = 320;
 
 static void diag_write(const char *fmt, ...) {
     if (g_diag_fd < 0) return;
@@ -45,6 +49,50 @@ static void diag_write(const char *fmt, ...) {
     if (n > 0) write(g_diag_fd, buf, n);
 }
 
+static void diag_event(const char *fmt, ...) {
+    if (g_diag_fd < 0) return;
+    const int idx = g_diag_event_count.fetch_add(1, std::memory_order_relaxed);
+    if (idx >= kDiagEventLimit) return;
+    char buf[640];
+    va_list ap;
+    va_start(ap, fmt);
+    int n = vsnprintf(buf, sizeof(buf), fmt, ap);
+    va_end(ap);
+    if (n > 0) write(g_diag_fd, buf, n);
+}
+
+static inline uint32_t fnv1a32(const uint8_t* data, size_t len, size_t sample_limit = 0) {
+    if (!data || len == 0) return 0;
+    const size_t n = (sample_limit > 0 && len > sample_limit) ? sample_limit : len;
+    uint32_t h = 2166136261u;
+    for (size_t i = 0; i < n; ++i) {
+        h ^= data[i];
+        h *= 16777619u;
+    }
+    return h;
+}
+
+static inline uint32_t key_fingerprint32(const uint8_t key[32]) {
+    return fnv1a32(key, 32, 32);
+}
+
+static inline uint32_t nonce_ts32(const uint8_t nonce[24]) {
+    return ((uint32_t)nonce[4] << 24) |
+           ((uint32_t)nonce[5] << 16) |
+           ((uint32_t)nonce[6] << 8) |
+           ((uint32_t)nonce[7]);
+}
+
+static const char* media_type_name(cricket::MediaType media_type) {
+    switch (media_type) {
+        case cricket::MEDIA_TYPE_AUDIO: return "audio";
+        case cricket::MEDIA_TYPE_VIDEO: return "video";
+        case cricket::MEDIA_TYPE_DATA: return "data";
+        case cricket::MEDIA_TYPE_UNSUPPORTED: return "unsupported";
+        default: return "unknown";
+    }
+}
+
 /* ── RTP helpers (for cases when additional_data is empty) ───── */
 
 struct ParsedRtpPacket {
@@ -72,6 +120,11 @@ struct GeneratedTsState {
     uint32_t next_step = 960;  // 20 ms @ 48 kHz (default Opus packetization)
 };
 
+struct AdditionalTsState {
+    bool initialized = false;
+    uint64_t base_timestamp = 0;
+};
+
 static inline uint16_t load16_be(const uint8_t* p) {
     return (uint16_t)(((uint16_t)p[0] << 8) | (uint16_t)p[1]);
 }
@@ -83,6 +136,17 @@ static inline uint32_t load32_be(const uint8_t* p) {
            ((uint32_t)p[3]);
 }
 
+static inline uint64_t load64_be(const uint8_t* p) {
+    return ((uint64_t)p[0] << 56) |
+           ((uint64_t)p[1] << 48) |
+           ((uint64_t)p[2] << 40) |
+           ((uint64_t)p[3] << 32) |
+           ((uint64_t)p[4] << 24) |
+           ((uint64_t)p[5] << 16) |
+           ((uint64_t)p[6] << 8) |
+           ((uint64_t)p[7]);
+}
+
 
 static bool parse_rtp_packet(const uint8_t* data, size_t len, ParsedRtpPacket* out) {
     if (!data || !out || len < 12) return false;
@@ -133,6 +197,8 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
             state->probe_ssrc = packet.ssrc;
             state->probe_sequence = packet.sequence;
             state->probe_timestamp = packet.timestamp;
+            diag_event("RTP probe-start ssrc=%u seq=%u ts=%u hdr=%zu\n",
+                       packet.ssrc, packet.sequence, packet.timestamp, packet.header_size);
             return false;
         }
 
@@ -151,8 +217,12 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
         state->ssrc = packet.ssrc;
         state->last_sequence = packet.sequence;
         state->last_timestamp = packet.timestamp;
+        diag_event("RTP probe-lock ssrc=%u seq=%u ts=%u hdr=%zu\n",
+                   packet.ssrc, packet.sequence, packet.timestamp, packet.header_size);
     } else {
         if (packet.ssrc != state->ssrc) {
+            diag_event("RTP probe-unlock reason=ssrc-change old=%u new=%u seq=%u ts=%u\n",
+                       state->ssrc, packet.ssrc, packet.sequence, packet.timestamp);
             state->locked = false;
             state->has_probe = true;
             state->probe_ssrc = packet.ssrc;
@@ -168,6 +238,8 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
             state->last_timestamp = packet.timestamp;
         } else if (seq_delta != 0) {
             // Not plausible for a continuous stream: re-probe.
+            diag_event("RTP probe-unlock reason=seq-jump delta=%u ssrc=%u last_seq=%u seq=%u ts=%u\n",
+                       seq_delta, packet.ssrc, state->last_sequence, packet.sequence, packet.timestamp);
             state->locked = false;
             state->has_probe = true;
             state->probe_ssrc = packet.ssrc;
@@ -188,14 +260,39 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
 static bool fill_nonce_from_additional_data(const uint8_t* data,
                                             size_t len,
                                             uint8_t nonce[24],
-                                            bool* used_rtp_header) {
+                                            bool* used_rtp_header,
+                                            AdditionalTsState* ts_state,
+                                            bool* used_relative_ts) {
     if (used_rtp_header) *used_rtp_header = false;
+    if (used_relative_ts) *used_relative_ts = false;
     if (!data || len < 8) return false;
 
     // Desktop-compatible path: additional_data contains encoded frame timestamp
-    // as 8-byte BE value. Use it directly as nonce[0..7].
+    // as 8-byte BE value. On Android sender/receiver can have different absolute
+    // base in some pipelines; ts_state enables optional relative fallback.
+    // Primary desktop-compatible mode uses absolute timestamp (ts_state == nullptr).
     if (len == 8) {
-        memcpy(nonce, data, 8);
+        uint64_t ts64 = load64_be(data);
+        uint64_t ts_rel = ts64;
+        if (ts_state != nullptr) {
+            if (!ts_state->initialized) {
+                ts_state->initialized = true;
+                ts_state->base_timestamp = ts64;
+            } else if (ts64 < ts_state->base_timestamp) {
+                // New stream or reset; avoid huge wrapped nonce drift.
+                ts_state->base_timestamp = ts64;
+            }
+            ts_rel = ts64 - ts_state->base_timestamp;
+            if (used_relative_ts) *used_relative_ts = true;
+        }
+        nonce[0] = (uint8_t)(ts_rel >> 56);
+        nonce[1] = (uint8_t)(ts_rel >> 48);
+        nonce[2] = (uint8_t)(ts_rel >> 40);
+        nonce[3] = (uint8_t)(ts_rel >> 32);
+        nonce[4] = (uint8_t)(ts_rel >> 24);
+        nonce[5] = (uint8_t)(ts_rel >> 16);
+        nonce[6] = (uint8_t)(ts_rel >> 8);
+        nonce[7] = (uint8_t)(ts_rel);
         return true;
     }
 
@@ -218,6 +315,18 @@ static bool fill_nonce_from_additional_data(const uint8_t* data,
     return true;
 }
 
+static bool is_plausible_opus_packet(const uint8_t* packet, size_t len);
+
+static bool is_plausible_decrypted_audio_frame(const uint8_t* data, size_t len) {
+    if (!data || len == 0) return false;
+
+    ParsedRtpPacket packet;
+    if (parse_rtp_packet(data, len, &packet) && packet.header_size < len) {
+        return is_plausible_opus_packet(data + packet.header_size, len - packet.header_size);
+    }
+    return is_plausible_opus_packet(data, len);
+}
+
 static inline void fill_nonce_from_ts32(uint32_t ts, uint8_t nonce[24]) {
     nonce[4] = (uint8_t)(ts >> 24);
     nonce[5] = (uint8_t)(ts >> 16);
@@ -263,18 +372,94 @@ static uint32_t infer_opus_packet_duration_samples(const uint8_t* packet, size_t
     return total;
 }
 
+static bool decode_opus_frame_len(const uint8_t* packet,
+                                  size_t len,
+                                  size_t* offset,
+                                  size_t* frame_len) {
+    if (!packet || !offset || !frame_len) return false;
+    if (*offset >= len) return false;
+
+    const uint8_t b0 = packet[*offset];
+    (*offset)++;
+    if (b0 < 252) {
+        *frame_len = b0;
+        return true;
+    }
+    if (*offset >= len) return false;
+    const uint8_t b1 = packet[*offset];
+    (*offset)++;
+    *frame_len = (size_t)b1 * 4u + (size_t)b0;
+    return true;
+}
+
 static bool is_plausible_opus_packet(const uint8_t* packet, size_t len) {
     if (!packet || len == 0 || len > 2000) return false;
+
     const uint8_t toc = packet[0];
     const uint8_t config = (uint8_t)(toc >> 3);
     if (config > 31) return false;
+
     const uint8_t frame_code = (uint8_t)(toc & 0x03);
-    if (frame_code != 3) return true;
+    const size_t payload_len = len - 1;
+
+    if (frame_code == 0) {
+        // 1 frame, full payload
+        return payload_len >= 1;
+    }
+
+    if (frame_code == 1) {
+        // 2 CBR frames, equal sizes.
+        if (payload_len < 2) return false;
+        return (payload_len % 2) == 0;
+    }
+
+    if (frame_code == 2) {
+        // 2 VBR frames
+        size_t off = 1;
+        size_t len1 = 0;
+        if (!decode_opus_frame_len(packet, len, &off, &len1)) return false;
+        if (len1 == 0) return false;
+        if (off + len1 >= len) return false;  // need non-empty second frame
+        const size_t len2 = len - off - len1;
+        if (len2 == 0) return false;
+        return true;
+    }
+
+    // frame_code == 3: arbitrary number of frames
     if (len < 2) return false;
-    const uint8_t frame_count = (uint8_t)(packet[1] & 0x3F);
+    const uint8_t ch = packet[1];
+    const bool vbr = (ch & 0x80) != 0;
+    const bool has_padding = (ch & 0x40) != 0;
+    const uint8_t frame_count = (uint8_t)(ch & 0x3F);
     if (frame_count == 0 || frame_count > 48) return false;
+
     const uint32_t total = opus_base_frame_samples(config) * (uint32_t)frame_count;
-    return total <= 5760;
+    if (total > 5760) return false;
+
+    // Padding bit is rarely used in live voice. Rejecting it improves
+    // discrimination between valid Opus and random decrypted noise.
+    if (has_padding) return false;
+
+    if (!vbr) {
+        const size_t data_len = len - 2;
+        if (data_len < (size_t)frame_count) return false;
+        return (data_len % frame_count) == 0;
+    }
+
+    // VBR: parse lengths for first N-1 frames; last frame consumes rest.
+    size_t off = 2;
+    size_t consumed = 0;
+    for (size_t i = 0; i + 1 < frame_count; ++i) {
+        size_t flen = 0;
+        if (!decode_opus_frame_len(packet, len, &off, &flen)) return false;
+        if (flen == 0) return false;
+        consumed += flen;
+        if (off + consumed >= len) return false;
+    }
+    const size_t remaining = len - off;
+    if (remaining <= consumed) return false;
+    const size_t last = remaining - consumed;
+    return last > 0;
 }
 
 /* ── Native crash handler — writes to file before dying ──────── */
@@ -314,8 +499,13 @@ class XChaCha20Encryptor final : public webrtc::FrameEncryptorInterface {
 public:
     explicit XChaCha20Encryptor(const uint8_t key[32]) {
         memcpy(key_, key, 32);
+        key_fingerprint_ = key_fingerprint32(key_);
+        LOGI("ENC init ptr=%p key_fp=%08x", this, key_fingerprint_);
+        diag_event("ENC init ptr=%p key_fp=%08x\n", this, key_fingerprint_);
     }
 
+    uint32_t KeyFingerprint() const { return key_fingerprint_; }
+
     /* ── RefCountInterface ─────────────────────────────────────── */
     void AddRef() const override {
         ref_.fetch_add(1, std::memory_order_relaxed);
@@ -344,8 +534,8 @@ public:
      * If RTP header is found inside frame, we leave header bytes unencrypted
      * and encrypt only payload (desktop-compatible).
      */
-    int Encrypt(cricket::MediaType /*media_type*/,
-                uint32_t /*ssrc*/,
+    int Encrypt(cricket::MediaType media_type,
+                uint32_t ssrc,
                 rtc::ArrayView<const uint8_t> additional_data,
                 rtc::ArrayView<const uint8_t> frame,
                 rtc::ArrayView<uint8_t> encrypted_frame,
@@ -364,11 +554,14 @@ public:
 
         // Build nonce from RTP timestamp in additional_data (preferred).
         uint8_t nonce[24] = {0};
+        bool additional_used_relative_ts = false;
         nonce_from_additional_data = fill_nonce_from_additional_data(
             additional_data.data(),
             additional_data.size(),
             nonce,
-            &additional_was_rtp_header);
+            &additional_was_rtp_header,
+            nullptr,
+            &additional_used_relative_ts);
         if (!nonce_from_additional_data) {
             nonce_from_rtp_header =
                 fill_nonce_from_rtp_frame(frame.data(), frame.size(), &rtp_probe_, nonce, &header_size);
@@ -381,6 +574,8 @@ public:
                 nonce_from_generated_ts = true;
                 generated_ts_used = generated_ts_.next_timestamp;
                 fill_nonce_from_ts32(generated_ts_used, nonce);
+                diag_event("ENC fallback=generated-ts mt=%s ssrc=%u frame_sz=%zu ad_sz=%zu gen_ts=%u\n",
+                           media_type_name(media_type), ssrc, frame.size(), additional_data.size(), generated_ts_used);
             }
         }
 
@@ -409,23 +604,46 @@ public:
             generated_ts_.next_timestamp = generated_ts_used + step;
         }
 
-        // Diag: log first 3 frames
+        // Diag: log first frames with enough context for crash analysis.
         int n = diag_count_.fetch_add(1, std::memory_order_relaxed);
-        if (n < 3) {
+        if (n < kDiagFrameLimit) {
+            uint8_t ad_prefix[8] = {0};
+            const size_t ad_copy = additional_data.size() < sizeof(ad_prefix)
+                                       ? additional_data.size()
+                                       : sizeof(ad_prefix);
+            if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy);
+            ParsedRtpPacket rtp{};
+            const bool has_rtp = parse_rtp_packet(frame.data(), frame.size(), &rtp);
+            const bool opus_plausible =
+                has_rtp && rtp.header_size < frame.size()
+                    ? is_plausible_opus_packet(frame.data() + rtp.header_size, frame.size() - rtp.header_size)
+                    : is_plausible_opus_packet(frame.data(), frame.size());
+            const uint32_t in_hash = fnv1a32(frame.data(), frame.size(), kFrameHashSampleBytes);
+            const uint32_t out_hash = fnv1a32(encrypted_frame.data(), frame.size(), kFrameHashSampleBytes);
             const char* mode =
                 nonce_from_rtp_header
                     ? "rtp"
                     : (nonce_from_generated_ts
                            ? "gen"
                            : (nonce_from_additional_data
-                                  ? (additional_was_rtp_header ? "ad-rtp" : "raw-abs")
+                                  ? (additional_was_rtp_header
+                                         ? "ad-rtp"
+                                         : (additional_used_relative_ts ? "raw-rel" : "raw-abs"))
                                   : "raw-abs"));
-            LOGI("ENC frame#%d: sz=%zu ad=%zu hdr=%zu mode=%s nonce=%02x%02x%02x%02x",
-                 n, frame.size(), additional_data.size(), header_size, mode,
-                 nonce[4], nonce[5], nonce[6], nonce[7]);
-            diag_write("ENC frame#%d: sz=%zu ad=%zu hdr=%zu mode=%s nonce[4..7]=%02x%02x%02x%02x\n",
-                       n, frame.size(), additional_data.size(), header_size, mode,
-                       nonce[4], nonce[5], nonce[6], nonce[7]);
+            LOGI("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x",
+                 n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode,
+                 nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
+                 has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0,
+                 opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash,
+                 ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
+                 ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
+            diag_write("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n",
+                       n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode,
+                       nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
+                       has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0,
+                       opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash,
+                       ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
+                       ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
         }
         return 0;
     }
@@ -435,13 +653,17 @@ public:
     }
 
 protected:
-    ~XChaCha20Encryptor() override { memset(key_, 0, 32); }
+    ~XChaCha20Encryptor() override {
+        diag_event("ENC destroy ptr=%p key_fp=%08x\n", this, key_fingerprint_);
+        memset(key_, 0, 32);
+    }
 
 private:
     mutable std::atomic<int> ref_{0};
     mutable std::atomic<int> diag_count_{0};
     mutable RtpProbeState rtp_probe_;
     mutable GeneratedTsState generated_ts_;
+    uint32_t key_fingerprint_ = 0;
     uint8_t key_[32];
 };
 
@@ -453,8 +675,13 @@ class XChaCha20Decryptor final : public webrtc::FrameDecryptorInterface {
 public:
     explicit XChaCha20Decryptor(const uint8_t key[32]) {
         memcpy(key_, key, 32);
+        key_fingerprint_ = key_fingerprint32(key_);
+        LOGI("DEC init ptr=%p key_fp=%08x", this, key_fingerprint_);
+        diag_event("DEC init ptr=%p key_fp=%08x\n", this, key_fingerprint_);
     }
 
+    uint32_t KeyFingerprint() const { return key_fingerprint_; }
+
     /* ── RefCountInterface ─────────────────────────────────────── */
     void AddRef() const override {
         ref_.fetch_add(1, std::memory_order_relaxed);
@@ -474,8 +701,8 @@ public:
      * - if RTP header is present inside encrypted_frame (fallback path),
      *   keep header bytes untouched and decrypt payload only.
      */
-    Result Decrypt(cricket::MediaType /*media_type*/,
-                   const std::vector<uint32_t>& /*csrcs*/,
+    Result Decrypt(cricket::MediaType media_type,
+                   const std::vector<uint32_t>& csrcs,
                    rtc::ArrayView<const uint8_t> additional_data,
                    rtc::ArrayView<const uint8_t> encrypted_frame,
                    rtc::ArrayView<uint8_t> frame) override {
@@ -485,12 +712,17 @@ public:
         bool nonce_from_generated_ts = false;
         bool nonce_from_additional_data = false;
         bool additional_was_rtp_header = false;
+        bool additional_used_relative_ts = false;
+        bool used_additional_relative_fallback = false;
+        bool used_plain_passthrough = false;
         uint32_t generated_ts_used = 0;
         nonce_from_additional_data = fill_nonce_from_additional_data(
             additional_data.data(),
             additional_data.size(),
             nonce,
-            &additional_was_rtp_header);
+            &additional_was_rtp_header,
+            nullptr,
+            &additional_used_relative_ts);
         if (!nonce_from_additional_data) {
             nonce_from_rtp_header =
                 fill_nonce_from_rtp_frame(encrypted_frame.data(), encrypted_frame.size(), &rtp_probe_, nonce, &header_size);
@@ -503,6 +735,8 @@ public:
                 nonce_from_generated_ts = true;
                 generated_ts_used = generated_ts_.next_timestamp;
                 fill_nonce_from_ts32(generated_ts_used, nonce);
+                diag_event("DEC fallback=generated-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu gen_ts=%u\n",
+                           media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), generated_ts_used);
             }
         }
 
@@ -527,6 +761,53 @@ public:
             rosetta_xchacha20_xor(frame.data(), encrypted_frame.data(), encrypted_frame.size(), nonce, key_);
         }
 
+        if (nonce_from_additional_data) {
+            bool plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size());
+
+            // Fallback for Android pipelines where additional_data timestamps are
+            // stream-relative while remote side uses a different absolute base.
+            if (!plausible && additional_data.size() == 8 && !additional_was_rtp_header) {
+                uint8_t nonce_rel[24] = {0};
+                bool rel_used = false;
+                if (fill_nonce_from_additional_data(
+                        additional_data.data(),
+                        additional_data.size(),
+                        nonce_rel,
+                        nullptr,
+                        &additional_rel_ts_state_,
+                        &rel_used) &&
+                    rel_used) {
+                    std::vector<uint8_t> candidate(encrypted_frame.size());
+                    rosetta_xchacha20_xor(
+                        candidate.data(),
+                        encrypted_frame.data(),
+                        encrypted_frame.size(),
+                        nonce_rel,
+                        key_);
+                    if (is_plausible_decrypted_audio_frame(candidate.data(), candidate.size())) {
+                        memcpy(frame.data(), candidate.data(), candidate.size());
+                        memcpy(nonce, nonce_rel, sizeof(nonce));
+                        plausible = true;
+                        used_additional_relative_fallback = true;
+                        additional_used_relative_ts = true;
+                        diag_event("DEC fallback=relative-ad-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu nonce_ts=%u\n",
+                                   media_type_name(media_type), csrcs.size(), encrypted_frame.size(),
+                                   additional_data.size(), nonce_ts32(nonce));
+                    }
+                }
+            }
+
+            // If payload already looks like valid Opus, keep plaintext.
+            // This protects interop when peer stream is unexpectedly unencrypted.
+            if (!plausible &&
+                is_plausible_decrypted_audio_frame(encrypted_frame.data(), encrypted_frame.size())) {
+                memcpy(frame.data(), encrypted_frame.data(), encrypted_frame.size());
+                used_plain_passthrough = true;
+                diag_event("DEC fallback=plain-passthrough mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu\n",
+                           media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size());
+            }
+        }
+
         if (nonce_from_generated_ts) {
             bool plausible = is_plausible_opus_packet(frame.data(), encrypted_frame.size());
 
@@ -548,6 +829,9 @@ public:
                         generated_ts_used = ts_try;
                         used_generated_resync = true;
                         plausible = true;
+                        diag_event("DEC fallback=generated-resync mt=%s csrcs=%zu enc_sz=%zu ts_try=%u step=%u probe=%u\n",
+                                   media_type_name(media_type), csrcs.size(), encrypted_frame.size(),
+                                   ts_try, generated_ts_.next_step, i);
                         break;
                     }
                 }
@@ -558,25 +842,68 @@ public:
             generated_ts_.next_timestamp = generated_ts_used + step;
         }
 
-        // Diag: log first 3 frames
+        // Diag: log first frames with enough context for crash analysis.
         int n = diag_count_.fetch_add(1, std::memory_order_relaxed);
-        if (n < 3) {
+        if (n < kDiagFrameLimit) {
+            uint8_t ad_prefix[8] = {0};
+            const size_t ad_copy = additional_data.size() < sizeof(ad_prefix)
+                                       ? additional_data.size()
+                                       : sizeof(ad_prefix);
+            if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy);
+            ParsedRtpPacket enc_rtp{};
+            ParsedRtpPacket dec_rtp{};
+            const bool has_enc_rtp = parse_rtp_packet(encrypted_frame.data(), encrypted_frame.size(), &enc_rtp);
+            const bool has_dec_rtp = parse_rtp_packet(frame.data(), encrypted_frame.size(), &dec_rtp);
+            const bool dec_plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size());
+            const uint32_t enc_hash = fnv1a32(encrypted_frame.data(), encrypted_frame.size(), kFrameHashSampleBytes);
+            const uint32_t dec_hash = fnv1a32(frame.data(), encrypted_frame.size(), kFrameHashSampleBytes);
             const char* mode = nullptr;
             if (nonce_from_rtp_header) {
                 mode = "rtp";
             } else if (nonce_from_generated_ts) {
                 mode = used_generated_resync ? "gen-resync" : "gen";
             } else if (nonce_from_additional_data) {
-                mode = additional_was_rtp_header ? "ad-rtp" : "raw-abs";
+                if (used_plain_passthrough) mode = "raw-plain";
+                else if (additional_was_rtp_header) mode = "ad-rtp";
+                else if (used_additional_relative_fallback) mode = "raw-rel-fb";
+                else mode = additional_used_relative_ts ? "raw-rel" : "raw-abs";
             } else {
                 mode = "raw-abs";
             }
-            LOGI("DEC frame#%d: enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce=%02x%02x%02x%02x",
-                 n, encrypted_frame.size(), additional_data.size(), header_size, mode,
-                 nonce[4], nonce[5], nonce[6], nonce[7]);
-            diag_write("DEC frame#%d: enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce[4..7]=%02x%02x%02x%02x\n",
-                       n, encrypted_frame.size(), additional_data.size(), header_size, mode,
-                       nonce[4], nonce[5], nonce[6], nonce[7]);
+            uint32_t bad_streak = 0;
+            if (!dec_plausible) {
+                bad_streak = bad_audio_streak_.fetch_add(1, std::memory_order_relaxed) + 1;
+                if (bad_streak == 1 || bad_streak == 3 || bad_streak == 10 || (bad_streak % 50) == 0) {
+                    diag_event("DEC degraded mt=%s csrcs=%zu mode=%s bad_streak=%u nonce_ts=%u key_fp=%08x\n",
+                               media_type_name(media_type), csrcs.size(), mode, bad_streak,
+                               nonce_ts32(nonce), key_fingerprint_);
+                }
+            } else {
+                const uint32_t prev_bad = bad_audio_streak_.exchange(0, std::memory_order_relaxed);
+                if (prev_bad >= 3) {
+                    diag_event("DEC recovered mt=%s csrcs=%zu mode=%s prev_bad_streak=%u nonce_ts=%u key_fp=%08x\n",
+                               media_type_name(media_type), csrcs.size(), mode, prev_bad,
+                               nonce_ts32(nonce), key_fingerprint_);
+                }
+            }
+            LOGI("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x",
+                 n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode,
+                 nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
+                 dec_plausible ? 1 : 0, bad_streak,
+                 has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0,
+                 has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0,
+                 key_fingerprint_, enc_hash, dec_hash,
+                 ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
+                 ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
+            diag_write("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n",
+                       n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode,
+                       nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
+                       dec_plausible ? 1 : 0, bad_streak,
+                       has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0,
+                       has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0,
+                       key_fingerprint_, enc_hash, dec_hash,
+                       ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
+                       ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
         }
 
         return {Result::Status::kOk, encrypted_frame.size()};
@@ -587,13 +914,20 @@ public:
     }
 
 protected:
-    ~XChaCha20Decryptor() override { memset(key_, 0, 32); }
+    ~XChaCha20Decryptor() override {
+        diag_event("DEC destroy ptr=%p key_fp=%08x bad_streak=%u\n",
+                   this, key_fingerprint_, bad_audio_streak_.load(std::memory_order_relaxed));
+        memset(key_, 0, 32);
+    }
 
 private:
     mutable std::atomic<int> ref_{0};
     mutable std::atomic<int> diag_count_{0};
+    mutable std::atomic<uint32_t> bad_audio_streak_{0};
     mutable RtpProbeState rtp_probe_;
     mutable GeneratedTsState generated_ts_;
+    mutable AdditionalTsState additional_rel_ts_state_;
+    uint32_t key_fingerprint_ = 0;
     uint8_t key_[32];
 };
 
@@ -633,9 +967,14 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor(
         JNIEnv *env, jclass, jbyteArray jKey)
 {
     jsize len = env->GetArrayLength(jKey);
-    if (len < 32) return 0;
+    if (len < 32) {
+        LOGE("Create encryptor failed: key length=%d (<32)", (int)len);
+        diag_event("ENC create-failed key_len=%d\n", (int)len);
+        return 0;
+    }
 
     auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr);
+    const uint32_t key_fp = key_fingerprint32(key);
     auto *enc = new XChaCha20Encryptor(key);
     env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT);
 
@@ -643,7 +982,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor(
     // WebRTC's scoped_refptr will AddRef again when it takes ownership.
     enc->AddRef();
 
-    LOGI("Created XChaCha20 encryptor %p", enc);
+    LOGI("Created XChaCha20 encryptor %p key_fp=%08x", enc, key_fp);
+    diag_event("ENC created ptr=%p key_fp=%08x key_len=%d\n", enc, key_fp, (int)len);
     return reinterpret_cast<jlong>(enc);
 }
 
@@ -653,6 +993,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseEncryptor(
 {
     if (ptr == 0) return;
     auto *enc = reinterpret_cast<XChaCha20Encryptor *>(ptr);
+    LOGI("Release XChaCha20 encryptor %p key_fp=%08x", enc, enc->KeyFingerprint());
+    diag_event("ENC release ptr=%p key_fp=%08x\n", enc, enc->KeyFingerprint());
     enc->Release();
 }
 
@@ -663,15 +1005,21 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateDecryptor(
         JNIEnv *env, jclass, jbyteArray jKey)
 {
     jsize len = env->GetArrayLength(jKey);
-    if (len < 32) return 0;
+    if (len < 32) {
+        LOGE("Create decryptor failed: key length=%d (<32)", (int)len);
+        diag_event("DEC create-failed key_len=%d\n", (int)len);
+        return 0;
+    }
 
     auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr);
+    const uint32_t key_fp = key_fingerprint32(key);
     auto *dec = new XChaCha20Decryptor(key);
     env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT);
 
     dec->AddRef();
 
-    LOGI("Created XChaCha20 decryptor %p", dec);
+    LOGI("Created XChaCha20 decryptor %p key_fp=%08x", dec, key_fp);
+    diag_event("DEC created ptr=%p key_fp=%08x key_len=%d\n", dec, key_fp, (int)len);
     return reinterpret_cast<jlong>(dec);
 }
 
@@ -681,6 +1029,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseDecryptor(
 {
     if (ptr == 0) return;
     auto *dec = reinterpret_cast<XChaCha20Decryptor *>(ptr);
+    LOGI("Release XChaCha20 decryptor %p key_fp=%08x", dec, dec->KeyFingerprint());
+    diag_event("DEC release ptr=%p key_fp=%08x\n", dec, dec->KeyFingerprint());
     dec->Release();
 }
 
@@ -712,6 +1062,7 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile(
         JNIEnv *env, jclass, jstring jPath)
 {
     if (g_diag_fd >= 0) { close(g_diag_fd); g_diag_fd = -1; }
+    g_diag_event_count.store(0, std::memory_order_relaxed);
 
     const char *path = env->GetStringUTFChars(jPath, nullptr);
     strncpy(g_diag_path, path, sizeof(g_diag_path) - 1);
@@ -719,8 +1070,9 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile(
     env->ReleaseStringUTFChars(jPath, path);
 
     if (g_diag_fd >= 0) {
-        diag_write("=== E2EE DIAGNOSTICS ===\n");
+        diag_write("=== E2EE DIAGNOSTICS pid=%d ===\n", (int)getpid());
         LOGI("Diag file opened: %s", g_diag_path);
+        diag_event("DIAG open path=%s\n", g_diag_path);
     } else {
         LOGE("Failed to open diag file: %s", g_diag_path);
     }
@@ -731,6 +1083,7 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCloseDiagFile(
         JNIEnv *, jclass)
 {
     if (g_diag_fd >= 0) {
+        diag_event("DIAG close path=%s\n", g_diag_path);
         diag_write("=== END ===\n");
         close(g_diag_fd);
         g_diag_fd = -1;