android: expand native e2ee diagnostics

This commit is contained in:
2026-03-26 21:27:17 +05:00
parent 31db795c56
commit 0af4e6587e

View File

@@ -34,6 +34,10 @@
static char g_diag_path[512] = {0}; static char g_diag_path[512] = {0};
static int g_diag_fd = -1; static int g_diag_fd = -1;
static std::atomic<int> g_diag_event_count{0};
static constexpr int kDiagEventLimit = 4000;
static constexpr int kDiagFrameLimit = 400;
static constexpr size_t kFrameHashSampleBytes = 320;
static void diag_write(const char *fmt, ...) { static void diag_write(const char *fmt, ...) {
if (g_diag_fd < 0) return; if (g_diag_fd < 0) return;
@@ -45,6 +49,50 @@ static void diag_write(const char *fmt, ...) {
if (n > 0) write(g_diag_fd, buf, n); if (n > 0) write(g_diag_fd, buf, n);
} }
static void diag_event(const char *fmt, ...) {
if (g_diag_fd < 0) return;
const int idx = g_diag_event_count.fetch_add(1, std::memory_order_relaxed);
if (idx >= kDiagEventLimit) return;
char buf[640];
va_list ap;
va_start(ap, fmt);
int n = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
if (n > 0) write(g_diag_fd, buf, n);
}
static inline uint32_t fnv1a32(const uint8_t* data, size_t len, size_t sample_limit = 0) {
if (!data || len == 0) return 0;
const size_t n = (sample_limit > 0 && len > sample_limit) ? sample_limit : len;
uint32_t h = 2166136261u;
for (size_t i = 0; i < n; ++i) {
h ^= data[i];
h *= 16777619u;
}
return h;
}
static inline uint32_t key_fingerprint32(const uint8_t key[32]) {
return fnv1a32(key, 32, 32);
}
static inline uint32_t nonce_ts32(const uint8_t nonce[24]) {
return ((uint32_t)nonce[4] << 24) |
((uint32_t)nonce[5] << 16) |
((uint32_t)nonce[6] << 8) |
((uint32_t)nonce[7]);
}
static const char* media_type_name(cricket::MediaType media_type) {
switch (media_type) {
case cricket::MEDIA_TYPE_AUDIO: return "audio";
case cricket::MEDIA_TYPE_VIDEO: return "video";
case cricket::MEDIA_TYPE_DATA: return "data";
case cricket::MEDIA_TYPE_UNSUPPORTED: return "unsupported";
default: return "unknown";
}
}
/* ── RTP helpers (for cases when additional_data is empty) ───── */ /* ── RTP helpers (for cases when additional_data is empty) ───── */
struct ParsedRtpPacket { struct ParsedRtpPacket {
@@ -72,6 +120,11 @@ struct GeneratedTsState {
uint32_t next_step = 960; // 20 ms @ 48 kHz (default Opus packetization) uint32_t next_step = 960; // 20 ms @ 48 kHz (default Opus packetization)
}; };
struct AdditionalTsState {
bool initialized = false;
uint64_t base_timestamp = 0;
};
static inline uint16_t load16_be(const uint8_t* p) { static inline uint16_t load16_be(const uint8_t* p) {
return (uint16_t)(((uint16_t)p[0] << 8) | (uint16_t)p[1]); return (uint16_t)(((uint16_t)p[0] << 8) | (uint16_t)p[1]);
} }
@@ -83,6 +136,17 @@ static inline uint32_t load32_be(const uint8_t* p) {
((uint32_t)p[3]); ((uint32_t)p[3]);
} }
static inline uint64_t load64_be(const uint8_t* p) {
return ((uint64_t)p[0] << 56) |
((uint64_t)p[1] << 48) |
((uint64_t)p[2] << 40) |
((uint64_t)p[3] << 32) |
((uint64_t)p[4] << 24) |
((uint64_t)p[5] << 16) |
((uint64_t)p[6] << 8) |
((uint64_t)p[7]);
}
static bool parse_rtp_packet(const uint8_t* data, size_t len, ParsedRtpPacket* out) { static bool parse_rtp_packet(const uint8_t* data, size_t len, ParsedRtpPacket* out) {
if (!data || !out || len < 12) return false; if (!data || !out || len < 12) return false;
@@ -133,6 +197,8 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
state->probe_ssrc = packet.ssrc; state->probe_ssrc = packet.ssrc;
state->probe_sequence = packet.sequence; state->probe_sequence = packet.sequence;
state->probe_timestamp = packet.timestamp; state->probe_timestamp = packet.timestamp;
diag_event("RTP probe-start ssrc=%u seq=%u ts=%u hdr=%zu\n",
packet.ssrc, packet.sequence, packet.timestamp, packet.header_size);
return false; return false;
} }
@@ -151,8 +217,12 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
state->ssrc = packet.ssrc; state->ssrc = packet.ssrc;
state->last_sequence = packet.sequence; state->last_sequence = packet.sequence;
state->last_timestamp = packet.timestamp; state->last_timestamp = packet.timestamp;
diag_event("RTP probe-lock ssrc=%u seq=%u ts=%u hdr=%zu\n",
packet.ssrc, packet.sequence, packet.timestamp, packet.header_size);
} else { } else {
if (packet.ssrc != state->ssrc) { if (packet.ssrc != state->ssrc) {
diag_event("RTP probe-unlock reason=ssrc-change old=%u new=%u seq=%u ts=%u\n",
state->ssrc, packet.ssrc, packet.sequence, packet.timestamp);
state->locked = false; state->locked = false;
state->has_probe = true; state->has_probe = true;
state->probe_ssrc = packet.ssrc; state->probe_ssrc = packet.ssrc;
@@ -168,6 +238,8 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
state->last_timestamp = packet.timestamp; state->last_timestamp = packet.timestamp;
} else if (seq_delta != 0) { } else if (seq_delta != 0) {
// Not plausible for a continuous stream: re-probe. // Not plausible for a continuous stream: re-probe.
diag_event("RTP probe-unlock reason=seq-jump delta=%u ssrc=%u last_seq=%u seq=%u ts=%u\n",
seq_delta, packet.ssrc, state->last_sequence, packet.sequence, packet.timestamp);
state->locked = false; state->locked = false;
state->has_probe = true; state->has_probe = true;
state->probe_ssrc = packet.ssrc; state->probe_ssrc = packet.ssrc;
@@ -188,14 +260,39 @@ static bool fill_nonce_from_rtp_frame(const uint8_t* data,
static bool fill_nonce_from_additional_data(const uint8_t* data, static bool fill_nonce_from_additional_data(const uint8_t* data,
size_t len, size_t len,
uint8_t nonce[24], uint8_t nonce[24],
bool* used_rtp_header) { bool* used_rtp_header,
AdditionalTsState* ts_state,
bool* used_relative_ts) {
if (used_rtp_header) *used_rtp_header = false; if (used_rtp_header) *used_rtp_header = false;
if (used_relative_ts) *used_relative_ts = false;
if (!data || len < 8) return false; if (!data || len < 8) return false;
// Desktop-compatible path: additional_data contains encoded frame timestamp // Desktop-compatible path: additional_data contains encoded frame timestamp
// as 8-byte BE value. Use it directly as nonce[0..7]. // as 8-byte BE value. On Android sender/receiver can have different absolute
// base in some pipelines; ts_state enables optional relative fallback.
// Primary desktop-compatible mode uses absolute timestamp (ts_state == nullptr).
if (len == 8) { if (len == 8) {
memcpy(nonce, data, 8); uint64_t ts64 = load64_be(data);
uint64_t ts_rel = ts64;
if (ts_state != nullptr) {
if (!ts_state->initialized) {
ts_state->initialized = true;
ts_state->base_timestamp = ts64;
} else if (ts64 < ts_state->base_timestamp) {
// New stream or reset; avoid huge wrapped nonce drift.
ts_state->base_timestamp = ts64;
}
ts_rel = ts64 - ts_state->base_timestamp;
if (used_relative_ts) *used_relative_ts = true;
}
nonce[0] = (uint8_t)(ts_rel >> 56);
nonce[1] = (uint8_t)(ts_rel >> 48);
nonce[2] = (uint8_t)(ts_rel >> 40);
nonce[3] = (uint8_t)(ts_rel >> 32);
nonce[4] = (uint8_t)(ts_rel >> 24);
nonce[5] = (uint8_t)(ts_rel >> 16);
nonce[6] = (uint8_t)(ts_rel >> 8);
nonce[7] = (uint8_t)(ts_rel);
return true; return true;
} }
@@ -218,6 +315,18 @@ static bool fill_nonce_from_additional_data(const uint8_t* data,
return true; return true;
} }
static bool is_plausible_opus_packet(const uint8_t* packet, size_t len);
static bool is_plausible_decrypted_audio_frame(const uint8_t* data, size_t len) {
if (!data || len == 0) return false;
ParsedRtpPacket packet;
if (parse_rtp_packet(data, len, &packet) && packet.header_size < len) {
return is_plausible_opus_packet(data + packet.header_size, len - packet.header_size);
}
return is_plausible_opus_packet(data, len);
}
static inline void fill_nonce_from_ts32(uint32_t ts, uint8_t nonce[24]) { static inline void fill_nonce_from_ts32(uint32_t ts, uint8_t nonce[24]) {
nonce[4] = (uint8_t)(ts >> 24); nonce[4] = (uint8_t)(ts >> 24);
nonce[5] = (uint8_t)(ts >> 16); nonce[5] = (uint8_t)(ts >> 16);
@@ -263,18 +372,94 @@ static uint32_t infer_opus_packet_duration_samples(const uint8_t* packet, size_t
return total; return total;
} }
static bool decode_opus_frame_len(const uint8_t* packet,
size_t len,
size_t* offset,
size_t* frame_len) {
if (!packet || !offset || !frame_len) return false;
if (*offset >= len) return false;
const uint8_t b0 = packet[*offset];
(*offset)++;
if (b0 < 252) {
*frame_len = b0;
return true;
}
if (*offset >= len) return false;
const uint8_t b1 = packet[*offset];
(*offset)++;
*frame_len = (size_t)b1 * 4u + (size_t)b0;
return true;
}
static bool is_plausible_opus_packet(const uint8_t* packet, size_t len) { static bool is_plausible_opus_packet(const uint8_t* packet, size_t len) {
if (!packet || len == 0 || len > 2000) return false; if (!packet || len == 0 || len > 2000) return false;
const uint8_t toc = packet[0]; const uint8_t toc = packet[0];
const uint8_t config = (uint8_t)(toc >> 3); const uint8_t config = (uint8_t)(toc >> 3);
if (config > 31) return false; if (config > 31) return false;
const uint8_t frame_code = (uint8_t)(toc & 0x03); const uint8_t frame_code = (uint8_t)(toc & 0x03);
if (frame_code != 3) return true; const size_t payload_len = len - 1;
if (frame_code == 0) {
// 1 frame, full payload
return payload_len >= 1;
}
if (frame_code == 1) {
// 2 CBR frames, equal sizes.
if (payload_len < 2) return false;
return (payload_len % 2) == 0;
}
if (frame_code == 2) {
// 2 VBR frames
size_t off = 1;
size_t len1 = 0;
if (!decode_opus_frame_len(packet, len, &off, &len1)) return false;
if (len1 == 0) return false;
if (off + len1 >= len) return false; // need non-empty second frame
const size_t len2 = len - off - len1;
if (len2 == 0) return false;
return true;
}
// frame_code == 3: arbitrary number of frames
if (len < 2) return false; if (len < 2) return false;
const uint8_t frame_count = (uint8_t)(packet[1] & 0x3F); const uint8_t ch = packet[1];
const bool vbr = (ch & 0x80) != 0;
const bool has_padding = (ch & 0x40) != 0;
const uint8_t frame_count = (uint8_t)(ch & 0x3F);
if (frame_count == 0 || frame_count > 48) return false; if (frame_count == 0 || frame_count > 48) return false;
const uint32_t total = opus_base_frame_samples(config) * (uint32_t)frame_count; const uint32_t total = opus_base_frame_samples(config) * (uint32_t)frame_count;
return total <= 5760; if (total > 5760) return false;
// Padding bit is rarely used in live voice. Rejecting it improves
// discrimination between valid Opus and random decrypted noise.
if (has_padding) return false;
if (!vbr) {
const size_t data_len = len - 2;
if (data_len < (size_t)frame_count) return false;
return (data_len % frame_count) == 0;
}
// VBR: parse lengths for first N-1 frames; last frame consumes rest.
size_t off = 2;
size_t consumed = 0;
for (size_t i = 0; i + 1 < frame_count; ++i) {
size_t flen = 0;
if (!decode_opus_frame_len(packet, len, &off, &flen)) return false;
if (flen == 0) return false;
consumed += flen;
if (off + consumed >= len) return false;
}
const size_t remaining = len - off;
if (remaining <= consumed) return false;
const size_t last = remaining - consumed;
return last > 0;
} }
/* ── Native crash handler — writes to file before dying ──────── */ /* ── Native crash handler — writes to file before dying ──────── */
@@ -314,8 +499,13 @@ class XChaCha20Encryptor final : public webrtc::FrameEncryptorInterface {
public: public:
explicit XChaCha20Encryptor(const uint8_t key[32]) { explicit XChaCha20Encryptor(const uint8_t key[32]) {
memcpy(key_, key, 32); memcpy(key_, key, 32);
key_fingerprint_ = key_fingerprint32(key_);
LOGI("ENC init ptr=%p key_fp=%08x", this, key_fingerprint_);
diag_event("ENC init ptr=%p key_fp=%08x\n", this, key_fingerprint_);
} }
uint32_t KeyFingerprint() const { return key_fingerprint_; }
/* ── RefCountInterface ─────────────────────────────────────── */ /* ── RefCountInterface ─────────────────────────────────────── */
void AddRef() const override { void AddRef() const override {
ref_.fetch_add(1, std::memory_order_relaxed); ref_.fetch_add(1, std::memory_order_relaxed);
@@ -344,8 +534,8 @@ public:
* If RTP header is found inside frame, we leave header bytes unencrypted * If RTP header is found inside frame, we leave header bytes unencrypted
* and encrypt only payload (desktop-compatible). * and encrypt only payload (desktop-compatible).
*/ */
int Encrypt(cricket::MediaType /*media_type*/, int Encrypt(cricket::MediaType media_type,
uint32_t /*ssrc*/, uint32_t ssrc,
rtc::ArrayView<const uint8_t> additional_data, rtc::ArrayView<const uint8_t> additional_data,
rtc::ArrayView<const uint8_t> frame, rtc::ArrayView<const uint8_t> frame,
rtc::ArrayView<uint8_t> encrypted_frame, rtc::ArrayView<uint8_t> encrypted_frame,
@@ -364,11 +554,14 @@ public:
// Build nonce from RTP timestamp in additional_data (preferred). // Build nonce from RTP timestamp in additional_data (preferred).
uint8_t nonce[24] = {0}; uint8_t nonce[24] = {0};
bool additional_used_relative_ts = false;
nonce_from_additional_data = fill_nonce_from_additional_data( nonce_from_additional_data = fill_nonce_from_additional_data(
additional_data.data(), additional_data.data(),
additional_data.size(), additional_data.size(),
nonce, nonce,
&additional_was_rtp_header); &additional_was_rtp_header,
nullptr,
&additional_used_relative_ts);
if (!nonce_from_additional_data) { if (!nonce_from_additional_data) {
nonce_from_rtp_header = nonce_from_rtp_header =
fill_nonce_from_rtp_frame(frame.data(), frame.size(), &rtp_probe_, nonce, &header_size); fill_nonce_from_rtp_frame(frame.data(), frame.size(), &rtp_probe_, nonce, &header_size);
@@ -381,6 +574,8 @@ public:
nonce_from_generated_ts = true; nonce_from_generated_ts = true;
generated_ts_used = generated_ts_.next_timestamp; generated_ts_used = generated_ts_.next_timestamp;
fill_nonce_from_ts32(generated_ts_used, nonce); fill_nonce_from_ts32(generated_ts_used, nonce);
diag_event("ENC fallback=generated-ts mt=%s ssrc=%u frame_sz=%zu ad_sz=%zu gen_ts=%u\n",
media_type_name(media_type), ssrc, frame.size(), additional_data.size(), generated_ts_used);
} }
} }
@@ -409,23 +604,46 @@ public:
generated_ts_.next_timestamp = generated_ts_used + step; generated_ts_.next_timestamp = generated_ts_used + step;
} }
// Diag: log first 3 frames // Diag: log first frames with enough context for crash analysis.
int n = diag_count_.fetch_add(1, std::memory_order_relaxed); int n = diag_count_.fetch_add(1, std::memory_order_relaxed);
if (n < 3) { if (n < kDiagFrameLimit) {
uint8_t ad_prefix[8] = {0};
const size_t ad_copy = additional_data.size() < sizeof(ad_prefix)
? additional_data.size()
: sizeof(ad_prefix);
if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy);
ParsedRtpPacket rtp{};
const bool has_rtp = parse_rtp_packet(frame.data(), frame.size(), &rtp);
const bool opus_plausible =
has_rtp && rtp.header_size < frame.size()
? is_plausible_opus_packet(frame.data() + rtp.header_size, frame.size() - rtp.header_size)
: is_plausible_opus_packet(frame.data(), frame.size());
const uint32_t in_hash = fnv1a32(frame.data(), frame.size(), kFrameHashSampleBytes);
const uint32_t out_hash = fnv1a32(encrypted_frame.data(), frame.size(), kFrameHashSampleBytes);
const char* mode = const char* mode =
nonce_from_rtp_header nonce_from_rtp_header
? "rtp" ? "rtp"
: (nonce_from_generated_ts : (nonce_from_generated_ts
? "gen" ? "gen"
: (nonce_from_additional_data : (nonce_from_additional_data
? (additional_was_rtp_header ? "ad-rtp" : "raw-abs") ? (additional_was_rtp_header
? "ad-rtp"
: (additional_used_relative_ts ? "raw-rel" : "raw-abs"))
: "raw-abs")); : "raw-abs"));
LOGI("ENC frame#%d: sz=%zu ad=%zu hdr=%zu mode=%s nonce=%02x%02x%02x%02x", LOGI("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x",
n, frame.size(), additional_data.size(), header_size, mode, n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode,
nonce[4], nonce[5], nonce[6], nonce[7]); nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
diag_write("ENC frame#%d: sz=%zu ad=%zu hdr=%zu mode=%s nonce[4..7]=%02x%02x%02x%02x\n", has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0,
n, frame.size(), additional_data.size(), header_size, mode, opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash,
nonce[4], nonce[5], nonce[6], nonce[7]); ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
diag_write("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n",
n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode,
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0,
opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash,
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
} }
return 0; return 0;
} }
@@ -435,13 +653,17 @@ public:
} }
protected: protected:
~XChaCha20Encryptor() override { memset(key_, 0, 32); } ~XChaCha20Encryptor() override {
diag_event("ENC destroy ptr=%p key_fp=%08x\n", this, key_fingerprint_);
memset(key_, 0, 32);
}
private: private:
mutable std::atomic<int> ref_{0}; mutable std::atomic<int> ref_{0};
mutable std::atomic<int> diag_count_{0}; mutable std::atomic<int> diag_count_{0};
mutable RtpProbeState rtp_probe_; mutable RtpProbeState rtp_probe_;
mutable GeneratedTsState generated_ts_; mutable GeneratedTsState generated_ts_;
uint32_t key_fingerprint_ = 0;
uint8_t key_[32]; uint8_t key_[32];
}; };
@@ -453,8 +675,13 @@ class XChaCha20Decryptor final : public webrtc::FrameDecryptorInterface {
public: public:
explicit XChaCha20Decryptor(const uint8_t key[32]) { explicit XChaCha20Decryptor(const uint8_t key[32]) {
memcpy(key_, key, 32); memcpy(key_, key, 32);
key_fingerprint_ = key_fingerprint32(key_);
LOGI("DEC init ptr=%p key_fp=%08x", this, key_fingerprint_);
diag_event("DEC init ptr=%p key_fp=%08x\n", this, key_fingerprint_);
} }
uint32_t KeyFingerprint() const { return key_fingerprint_; }
/* ── RefCountInterface ─────────────────────────────────────── */ /* ── RefCountInterface ─────────────────────────────────────── */
void AddRef() const override { void AddRef() const override {
ref_.fetch_add(1, std::memory_order_relaxed); ref_.fetch_add(1, std::memory_order_relaxed);
@@ -474,8 +701,8 @@ public:
* - if RTP header is present inside encrypted_frame (fallback path), * - if RTP header is present inside encrypted_frame (fallback path),
* keep header bytes untouched and decrypt payload only. * keep header bytes untouched and decrypt payload only.
*/ */
Result Decrypt(cricket::MediaType /*media_type*/, Result Decrypt(cricket::MediaType media_type,
const std::vector<uint32_t>& /*csrcs*/, const std::vector<uint32_t>& csrcs,
rtc::ArrayView<const uint8_t> additional_data, rtc::ArrayView<const uint8_t> additional_data,
rtc::ArrayView<const uint8_t> encrypted_frame, rtc::ArrayView<const uint8_t> encrypted_frame,
rtc::ArrayView<uint8_t> frame) override { rtc::ArrayView<uint8_t> frame) override {
@@ -485,12 +712,17 @@ public:
bool nonce_from_generated_ts = false; bool nonce_from_generated_ts = false;
bool nonce_from_additional_data = false; bool nonce_from_additional_data = false;
bool additional_was_rtp_header = false; bool additional_was_rtp_header = false;
bool additional_used_relative_ts = false;
bool used_additional_relative_fallback = false;
bool used_plain_passthrough = false;
uint32_t generated_ts_used = 0; uint32_t generated_ts_used = 0;
nonce_from_additional_data = fill_nonce_from_additional_data( nonce_from_additional_data = fill_nonce_from_additional_data(
additional_data.data(), additional_data.data(),
additional_data.size(), additional_data.size(),
nonce, nonce,
&additional_was_rtp_header); &additional_was_rtp_header,
nullptr,
&additional_used_relative_ts);
if (!nonce_from_additional_data) { if (!nonce_from_additional_data) {
nonce_from_rtp_header = nonce_from_rtp_header =
fill_nonce_from_rtp_frame(encrypted_frame.data(), encrypted_frame.size(), &rtp_probe_, nonce, &header_size); fill_nonce_from_rtp_frame(encrypted_frame.data(), encrypted_frame.size(), &rtp_probe_, nonce, &header_size);
@@ -503,6 +735,8 @@ public:
nonce_from_generated_ts = true; nonce_from_generated_ts = true;
generated_ts_used = generated_ts_.next_timestamp; generated_ts_used = generated_ts_.next_timestamp;
fill_nonce_from_ts32(generated_ts_used, nonce); fill_nonce_from_ts32(generated_ts_used, nonce);
diag_event("DEC fallback=generated-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu gen_ts=%u\n",
media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), generated_ts_used);
} }
} }
@@ -527,6 +761,53 @@ public:
rosetta_xchacha20_xor(frame.data(), encrypted_frame.data(), encrypted_frame.size(), nonce, key_); rosetta_xchacha20_xor(frame.data(), encrypted_frame.data(), encrypted_frame.size(), nonce, key_);
} }
if (nonce_from_additional_data) {
bool plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size());
// Fallback for Android pipelines where additional_data timestamps are
// stream-relative while remote side uses a different absolute base.
if (!plausible && additional_data.size() == 8 && !additional_was_rtp_header) {
uint8_t nonce_rel[24] = {0};
bool rel_used = false;
if (fill_nonce_from_additional_data(
additional_data.data(),
additional_data.size(),
nonce_rel,
nullptr,
&additional_rel_ts_state_,
&rel_used) &&
rel_used) {
std::vector<uint8_t> candidate(encrypted_frame.size());
rosetta_xchacha20_xor(
candidate.data(),
encrypted_frame.data(),
encrypted_frame.size(),
nonce_rel,
key_);
if (is_plausible_decrypted_audio_frame(candidate.data(), candidate.size())) {
memcpy(frame.data(), candidate.data(), candidate.size());
memcpy(nonce, nonce_rel, sizeof(nonce));
plausible = true;
used_additional_relative_fallback = true;
additional_used_relative_ts = true;
diag_event("DEC fallback=relative-ad-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu nonce_ts=%u\n",
media_type_name(media_type), csrcs.size(), encrypted_frame.size(),
additional_data.size(), nonce_ts32(nonce));
}
}
}
// If payload already looks like valid Opus, keep plaintext.
// This protects interop when peer stream is unexpectedly unencrypted.
if (!plausible &&
is_plausible_decrypted_audio_frame(encrypted_frame.data(), encrypted_frame.size())) {
memcpy(frame.data(), encrypted_frame.data(), encrypted_frame.size());
used_plain_passthrough = true;
diag_event("DEC fallback=plain-passthrough mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu\n",
media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size());
}
}
if (nonce_from_generated_ts) { if (nonce_from_generated_ts) {
bool plausible = is_plausible_opus_packet(frame.data(), encrypted_frame.size()); bool plausible = is_plausible_opus_packet(frame.data(), encrypted_frame.size());
@@ -548,6 +829,9 @@ public:
generated_ts_used = ts_try; generated_ts_used = ts_try;
used_generated_resync = true; used_generated_resync = true;
plausible = true; plausible = true;
diag_event("DEC fallback=generated-resync mt=%s csrcs=%zu enc_sz=%zu ts_try=%u step=%u probe=%u\n",
media_type_name(media_type), csrcs.size(), encrypted_frame.size(),
ts_try, generated_ts_.next_step, i);
break; break;
} }
} }
@@ -558,25 +842,68 @@ public:
generated_ts_.next_timestamp = generated_ts_used + step; generated_ts_.next_timestamp = generated_ts_used + step;
} }
// Diag: log first 3 frames // Diag: log first frames with enough context for crash analysis.
int n = diag_count_.fetch_add(1, std::memory_order_relaxed); int n = diag_count_.fetch_add(1, std::memory_order_relaxed);
if (n < 3) { if (n < kDiagFrameLimit) {
uint8_t ad_prefix[8] = {0};
const size_t ad_copy = additional_data.size() < sizeof(ad_prefix)
? additional_data.size()
: sizeof(ad_prefix);
if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy);
ParsedRtpPacket enc_rtp{};
ParsedRtpPacket dec_rtp{};
const bool has_enc_rtp = parse_rtp_packet(encrypted_frame.data(), encrypted_frame.size(), &enc_rtp);
const bool has_dec_rtp = parse_rtp_packet(frame.data(), encrypted_frame.size(), &dec_rtp);
const bool dec_plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size());
const uint32_t enc_hash = fnv1a32(encrypted_frame.data(), encrypted_frame.size(), kFrameHashSampleBytes);
const uint32_t dec_hash = fnv1a32(frame.data(), encrypted_frame.size(), kFrameHashSampleBytes);
const char* mode = nullptr; const char* mode = nullptr;
if (nonce_from_rtp_header) { if (nonce_from_rtp_header) {
mode = "rtp"; mode = "rtp";
} else if (nonce_from_generated_ts) { } else if (nonce_from_generated_ts) {
mode = used_generated_resync ? "gen-resync" : "gen"; mode = used_generated_resync ? "gen-resync" : "gen";
} else if (nonce_from_additional_data) { } else if (nonce_from_additional_data) {
mode = additional_was_rtp_header ? "ad-rtp" : "raw-abs"; if (used_plain_passthrough) mode = "raw-plain";
else if (additional_was_rtp_header) mode = "ad-rtp";
else if (used_additional_relative_fallback) mode = "raw-rel-fb";
else mode = additional_used_relative_ts ? "raw-rel" : "raw-abs";
} else { } else {
mode = "raw-abs"; mode = "raw-abs";
} }
LOGI("DEC frame#%d: enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce=%02x%02x%02x%02x", uint32_t bad_streak = 0;
n, encrypted_frame.size(), additional_data.size(), header_size, mode, if (!dec_plausible) {
nonce[4], nonce[5], nonce[6], nonce[7]); bad_streak = bad_audio_streak_.fetch_add(1, std::memory_order_relaxed) + 1;
diag_write("DEC frame#%d: enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce[4..7]=%02x%02x%02x%02x\n", if (bad_streak == 1 || bad_streak == 3 || bad_streak == 10 || (bad_streak % 50) == 0) {
n, encrypted_frame.size(), additional_data.size(), header_size, mode, diag_event("DEC degraded mt=%s csrcs=%zu mode=%s bad_streak=%u nonce_ts=%u key_fp=%08x\n",
nonce[4], nonce[5], nonce[6], nonce[7]); media_type_name(media_type), csrcs.size(), mode, bad_streak,
nonce_ts32(nonce), key_fingerprint_);
}
} else {
const uint32_t prev_bad = bad_audio_streak_.exchange(0, std::memory_order_relaxed);
if (prev_bad >= 3) {
diag_event("DEC recovered mt=%s csrcs=%zu mode=%s prev_bad_streak=%u nonce_ts=%u key_fp=%08x\n",
media_type_name(media_type), csrcs.size(), mode, prev_bad,
nonce_ts32(nonce), key_fingerprint_);
}
}
LOGI("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x",
n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode,
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
dec_plausible ? 1 : 0, bad_streak,
has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0,
has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0,
key_fingerprint_, enc_hash, dec_hash,
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
diag_write("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n",
n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode,
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
dec_plausible ? 1 : 0, bad_streak,
has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0,
has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0,
key_fingerprint_, enc_hash, dec_hash,
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
} }
return {Result::Status::kOk, encrypted_frame.size()}; return {Result::Status::kOk, encrypted_frame.size()};
@@ -587,13 +914,20 @@ public:
} }
protected: protected:
~XChaCha20Decryptor() override { memset(key_, 0, 32); } ~XChaCha20Decryptor() override {
diag_event("DEC destroy ptr=%p key_fp=%08x bad_streak=%u\n",
this, key_fingerprint_, bad_audio_streak_.load(std::memory_order_relaxed));
memset(key_, 0, 32);
}
private: private:
mutable std::atomic<int> ref_{0}; mutable std::atomic<int> ref_{0};
mutable std::atomic<int> diag_count_{0}; mutable std::atomic<int> diag_count_{0};
mutable std::atomic<uint32_t> bad_audio_streak_{0};
mutable RtpProbeState rtp_probe_; mutable RtpProbeState rtp_probe_;
mutable GeneratedTsState generated_ts_; mutable GeneratedTsState generated_ts_;
mutable AdditionalTsState additional_rel_ts_state_;
uint32_t key_fingerprint_ = 0;
uint8_t key_[32]; uint8_t key_[32];
}; };
@@ -633,9 +967,14 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor(
JNIEnv *env, jclass, jbyteArray jKey) JNIEnv *env, jclass, jbyteArray jKey)
{ {
jsize len = env->GetArrayLength(jKey); jsize len = env->GetArrayLength(jKey);
if (len < 32) return 0; if (len < 32) {
LOGE("Create encryptor failed: key length=%d (<32)", (int)len);
diag_event("ENC create-failed key_len=%d\n", (int)len);
return 0;
}
auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr); auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr);
const uint32_t key_fp = key_fingerprint32(key);
auto *enc = new XChaCha20Encryptor(key); auto *enc = new XChaCha20Encryptor(key);
env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT); env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT);
@@ -643,7 +982,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor(
// WebRTC's scoped_refptr will AddRef again when it takes ownership. // WebRTC's scoped_refptr will AddRef again when it takes ownership.
enc->AddRef(); enc->AddRef();
LOGI("Created XChaCha20 encryptor %p", enc); LOGI("Created XChaCha20 encryptor %p key_fp=%08x", enc, key_fp);
diag_event("ENC created ptr=%p key_fp=%08x key_len=%d\n", enc, key_fp, (int)len);
return reinterpret_cast<jlong>(enc); return reinterpret_cast<jlong>(enc);
} }
@@ -653,6 +993,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseEncryptor(
{ {
if (ptr == 0) return; if (ptr == 0) return;
auto *enc = reinterpret_cast<XChaCha20Encryptor *>(ptr); auto *enc = reinterpret_cast<XChaCha20Encryptor *>(ptr);
LOGI("Release XChaCha20 encryptor %p key_fp=%08x", enc, enc->KeyFingerprint());
diag_event("ENC release ptr=%p key_fp=%08x\n", enc, enc->KeyFingerprint());
enc->Release(); enc->Release();
} }
@@ -663,15 +1005,21 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateDecryptor(
JNIEnv *env, jclass, jbyteArray jKey) JNIEnv *env, jclass, jbyteArray jKey)
{ {
jsize len = env->GetArrayLength(jKey); jsize len = env->GetArrayLength(jKey);
if (len < 32) return 0; if (len < 32) {
LOGE("Create decryptor failed: key length=%d (<32)", (int)len);
diag_event("DEC create-failed key_len=%d\n", (int)len);
return 0;
}
auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr); auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr);
const uint32_t key_fp = key_fingerprint32(key);
auto *dec = new XChaCha20Decryptor(key); auto *dec = new XChaCha20Decryptor(key);
env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT); env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT);
dec->AddRef(); dec->AddRef();
LOGI("Created XChaCha20 decryptor %p", dec); LOGI("Created XChaCha20 decryptor %p key_fp=%08x", dec, key_fp);
diag_event("DEC created ptr=%p key_fp=%08x key_len=%d\n", dec, key_fp, (int)len);
return reinterpret_cast<jlong>(dec); return reinterpret_cast<jlong>(dec);
} }
@@ -681,6 +1029,8 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseDecryptor(
{ {
if (ptr == 0) return; if (ptr == 0) return;
auto *dec = reinterpret_cast<XChaCha20Decryptor *>(ptr); auto *dec = reinterpret_cast<XChaCha20Decryptor *>(ptr);
LOGI("Release XChaCha20 decryptor %p key_fp=%08x", dec, dec->KeyFingerprint());
diag_event("DEC release ptr=%p key_fp=%08x\n", dec, dec->KeyFingerprint());
dec->Release(); dec->Release();
} }
@@ -712,6 +1062,7 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile(
JNIEnv *env, jclass, jstring jPath) JNIEnv *env, jclass, jstring jPath)
{ {
if (g_diag_fd >= 0) { close(g_diag_fd); g_diag_fd = -1; } if (g_diag_fd >= 0) { close(g_diag_fd); g_diag_fd = -1; }
g_diag_event_count.store(0, std::memory_order_relaxed);
const char *path = env->GetStringUTFChars(jPath, nullptr); const char *path = env->GetStringUTFChars(jPath, nullptr);
strncpy(g_diag_path, path, sizeof(g_diag_path) - 1); strncpy(g_diag_path, path, sizeof(g_diag_path) - 1);
@@ -719,8 +1070,9 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile(
env->ReleaseStringUTFChars(jPath, path); env->ReleaseStringUTFChars(jPath, path);
if (g_diag_fd >= 0) { if (g_diag_fd >= 0) {
diag_write("=== E2EE DIAGNOSTICS ===\n"); diag_write("=== E2EE DIAGNOSTICS pid=%d ===\n", (int)getpid());
LOGI("Diag file opened: %s", g_diag_path); LOGI("Diag file opened: %s", g_diag_path);
diag_event("DIAG open path=%s\n", g_diag_path);
} else { } else {
LOGE("Failed to open diag file: %s", g_diag_path); LOGE("Failed to open diag file: %s", g_diag_path);
} }
@@ -731,6 +1083,7 @@ Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCloseDiagFile(
JNIEnv *, jclass) JNIEnv *, jclass)
{ {
if (g_diag_fd >= 0) { if (g_diag_fd >= 0) {
diag_event("DIAG close path=%s\n", g_diag_path);
diag_write("=== END ===\n"); diag_write("=== END ===\n");
close(g_diag_fd); close(g_diag_fd);
g_diag_fd = -1; g_diag_fd = -1;