All checks were successful
Android Kernel Build / build (push) Successful in 19m17s
- Enable diag file for all builds (was DEBUG-only) - Add native frame count + bad streak query methods (JNI) - Add periodic E2EE-HEALTH log with enc/dec frame counts - Reduce scan receivers spam (only log on state change) - Log E2EE state on call connected - Log when attachSender/attachReceiver skips due to missing key
1157 lines
48 KiB
C++
1157 lines
48 KiB
C++
/**
|
|
* JNI bridge for Rosetta E2EE.
|
|
*
|
|
* Provides:
|
|
* 1. HSalsa20 — for nacl.box.before() compatible key derivation
|
|
* 2. XChaCha20 FrameEncryptor / FrameDecryptor — inherits DIRECTLY from
|
|
* webrtc::FrameEncryptorInterface / FrameDecryptorInterface so the
|
|
* vtable is generated by the compiler, not guessed by us.
|
|
*/
|
|
|
|
#include <jni.h>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <atomic>
|
|
#include <vector>
|
|
#include <signal.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <time.h>
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
#include <android/log.h>
|
|
|
|
/* WebRTC M125 interface stubs (exact copies of the real headers) */
|
|
#include "webrtc/api/crypto/frame_encryptor_interface.h"
|
|
#include "webrtc/api/crypto/frame_decryptor_interface.h"
|
|
|
|
#include "crypto.h"
|
|
|
|
#define TAG "RosettaE2EE"
|
|
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
|
|
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
|
|
|
|
/* ── Diagnostics file — written from native, visible in crash reports ── */
|
|
|
|
static char g_diag_path[512] = {0};
|
|
static int g_diag_fd = -1;
|
|
static std::atomic<int> g_diag_event_count{0};
|
|
static constexpr int kDiagEventLimit = 4000;
|
|
static constexpr int kDiagFrameLimit = 400;
|
|
static constexpr size_t kFrameHashSampleBytes = 320;
|
|
|
|
static void diag_write(const char *fmt, ...) {
|
|
if (g_diag_fd < 0) return;
|
|
char buf[512];
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
int n = vsnprintf(buf, sizeof(buf), fmt, ap);
|
|
va_end(ap);
|
|
if (n > 0) write(g_diag_fd, buf, n);
|
|
}
|
|
|
|
static void diag_event(const char *fmt, ...) {
|
|
if (g_diag_fd < 0) return;
|
|
const int idx = g_diag_event_count.fetch_add(1, std::memory_order_relaxed);
|
|
if (idx >= kDiagEventLimit) return;
|
|
char buf[640];
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
int n = vsnprintf(buf, sizeof(buf), fmt, ap);
|
|
va_end(ap);
|
|
if (n > 0) write(g_diag_fd, buf, n);
|
|
}
|
|
|
|
static inline uint32_t fnv1a32(const uint8_t* data, size_t len, size_t sample_limit = 0) {
|
|
if (!data || len == 0) return 0;
|
|
const size_t n = (sample_limit > 0 && len > sample_limit) ? sample_limit : len;
|
|
uint32_t h = 2166136261u;
|
|
for (size_t i = 0; i < n; ++i) {
|
|
h ^= data[i];
|
|
h *= 16777619u;
|
|
}
|
|
return h;
|
|
}
|
|
|
|
static inline uint32_t key_fingerprint32(const uint8_t key[32]) {
|
|
return fnv1a32(key, 32, 32);
|
|
}
|
|
|
|
static inline uint32_t nonce_ts32(const uint8_t nonce[24]) {
|
|
return ((uint32_t)nonce[4] << 24) |
|
|
((uint32_t)nonce[5] << 16) |
|
|
((uint32_t)nonce[6] << 8) |
|
|
((uint32_t)nonce[7]);
|
|
}
|
|
|
|
static const char* media_type_name(cricket::MediaType media_type) {
|
|
switch (media_type) {
|
|
case cricket::MEDIA_TYPE_AUDIO: return "audio";
|
|
case cricket::MEDIA_TYPE_VIDEO: return "video";
|
|
case cricket::MEDIA_TYPE_DATA: return "data";
|
|
case cricket::MEDIA_TYPE_UNSUPPORTED: return "unsupported";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
/* ── RTP helpers (for cases when additional_data is empty) ───── */
|
|
|
|
struct ParsedRtpPacket {
|
|
size_t header_size = 0;
|
|
uint16_t sequence = 0;
|
|
uint32_t timestamp = 0;
|
|
uint32_t ssrc = 0;
|
|
};
|
|
|
|
struct RtpProbeState {
|
|
bool locked = false;
|
|
bool has_probe = false;
|
|
uint32_t probe_ssrc = 0;
|
|
uint16_t probe_sequence = 0;
|
|
uint32_t probe_timestamp = 0;
|
|
|
|
uint32_t ssrc = 0;
|
|
uint16_t last_sequence = 0;
|
|
uint32_t last_timestamp = 0;
|
|
};
|
|
|
|
struct GeneratedTsState {
|
|
bool initialized = false;
|
|
uint32_t next_timestamp = 0;
|
|
uint32_t next_step = 960; // 20 ms @ 48 kHz (default Opus packetization)
|
|
};
|
|
|
|
struct AdditionalTsState {
|
|
bool initialized = false;
|
|
uint64_t base_timestamp = 0;
|
|
};
|
|
|
|
struct SenderTsOffsetState {
|
|
bool initialized = false;
|
|
bool enabled = false;
|
|
uint64_t offset = 0;
|
|
};
|
|
|
|
static inline uint16_t load16_be(const uint8_t* p) {
|
|
return (uint16_t)(((uint16_t)p[0] << 8) | (uint16_t)p[1]);
|
|
}
|
|
|
|
static inline uint32_t load32_be(const uint8_t* p) {
|
|
return ((uint32_t)p[0] << 24) |
|
|
((uint32_t)p[1] << 16) |
|
|
((uint32_t)p[2] << 8) |
|
|
((uint32_t)p[3]);
|
|
}
|
|
|
|
static inline uint64_t load64_be(const uint8_t* p) {
|
|
return ((uint64_t)p[0] << 56) |
|
|
((uint64_t)p[1] << 48) |
|
|
((uint64_t)p[2] << 40) |
|
|
((uint64_t)p[3] << 32) |
|
|
((uint64_t)p[4] << 24) |
|
|
((uint64_t)p[5] << 16) |
|
|
((uint64_t)p[6] << 8) |
|
|
((uint64_t)p[7]);
|
|
}
|
|
|
|
|
|
static bool parse_rtp_packet(const uint8_t* data, size_t len, ParsedRtpPacket* out) {
|
|
if (!data || !out || len < 12) return false;
|
|
|
|
// RTP version must be 2.
|
|
const uint8_t version = (data[0] >> 6) & 0x03;
|
|
if (version != 2) return false;
|
|
|
|
const size_t csrc_count = (size_t)(data[0] & 0x0F);
|
|
const bool has_extension = (data[0] & 0x10) != 0;
|
|
|
|
size_t header = 12 + csrc_count * 4;
|
|
if (header > len) return false;
|
|
|
|
if (has_extension) {
|
|
// Extension header: 16-bit profile + 16-bit length (in 32-bit words)
|
|
if (len < header + 4) return false;
|
|
const uint16_t ext_len_words =
|
|
(uint16_t)(((uint16_t)data[header + 2] << 8) | (uint16_t)data[header + 3]);
|
|
const size_t ext_bytes = (size_t)ext_len_words * 4;
|
|
header += 4 + ext_bytes;
|
|
if (header > len) return false;
|
|
}
|
|
|
|
const size_t payload_size = len - header;
|
|
if (payload_size == 0 || payload_size > 1200) return false;
|
|
|
|
out->header_size = header;
|
|
out->sequence = load16_be(data + 2);
|
|
out->timestamp = load32_be(data + 4);
|
|
out->ssrc = load32_be(data + 8);
|
|
return true;
|
|
}
|
|
|
|
static bool fill_nonce_from_rtp_frame(const uint8_t* data,
|
|
size_t len,
|
|
RtpProbeState* state,
|
|
uint8_t nonce[24],
|
|
size_t* header_size) {
|
|
if (!state) return false;
|
|
|
|
ParsedRtpPacket packet;
|
|
if (!parse_rtp_packet(data, len, &packet)) return false;
|
|
|
|
if (!state->locked) {
|
|
if (!state->has_probe) {
|
|
state->has_probe = true;
|
|
state->probe_ssrc = packet.ssrc;
|
|
state->probe_sequence = packet.sequence;
|
|
state->probe_timestamp = packet.timestamp;
|
|
diag_event("RTP probe-start ssrc=%u seq=%u ts=%u hdr=%zu\n",
|
|
packet.ssrc, packet.sequence, packet.timestamp, packet.header_size);
|
|
return false;
|
|
}
|
|
|
|
const bool same_ssrc = packet.ssrc == state->probe_ssrc;
|
|
const uint16_t seq_delta = (uint16_t)(packet.sequence - state->probe_sequence);
|
|
const bool sequence_progressed = seq_delta > 0 && seq_delta <= 10;
|
|
if (!same_ssrc || !sequence_progressed) {
|
|
state->probe_ssrc = packet.ssrc;
|
|
state->probe_sequence = packet.sequence;
|
|
state->probe_timestamp = packet.timestamp;
|
|
return false;
|
|
}
|
|
|
|
state->locked = true;
|
|
state->has_probe = false;
|
|
state->ssrc = packet.ssrc;
|
|
state->last_sequence = packet.sequence;
|
|
state->last_timestamp = packet.timestamp;
|
|
diag_event("RTP probe-lock ssrc=%u seq=%u ts=%u hdr=%zu\n",
|
|
packet.ssrc, packet.sequence, packet.timestamp, packet.header_size);
|
|
} else {
|
|
if (packet.ssrc != state->ssrc) {
|
|
diag_event("RTP probe-unlock reason=ssrc-change old=%u new=%u seq=%u ts=%u\n",
|
|
state->ssrc, packet.ssrc, packet.sequence, packet.timestamp);
|
|
state->locked = false;
|
|
state->has_probe = true;
|
|
state->probe_ssrc = packet.ssrc;
|
|
state->probe_sequence = packet.sequence;
|
|
state->probe_timestamp = packet.timestamp;
|
|
return false;
|
|
}
|
|
|
|
const uint16_t seq_delta = (uint16_t)(packet.sequence - state->last_sequence);
|
|
// Accept in-order packets and small jumps (packet loss).
|
|
if (seq_delta != 0 && seq_delta <= 200) {
|
|
state->last_sequence = packet.sequence;
|
|
state->last_timestamp = packet.timestamp;
|
|
} else if (seq_delta != 0) {
|
|
// Not plausible for a continuous stream: re-probe.
|
|
diag_event("RTP probe-unlock reason=seq-jump delta=%u ssrc=%u last_seq=%u seq=%u ts=%u\n",
|
|
seq_delta, packet.ssrc, state->last_sequence, packet.sequence, packet.timestamp);
|
|
state->locked = false;
|
|
state->has_probe = true;
|
|
state->probe_ssrc = packet.ssrc;
|
|
state->probe_sequence = packet.sequence;
|
|
state->probe_timestamp = packet.timestamp;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
nonce[4] = (uint8_t)(packet.timestamp >> 24);
|
|
nonce[5] = (uint8_t)(packet.timestamp >> 16);
|
|
nonce[6] = (uint8_t)(packet.timestamp >> 8);
|
|
nonce[7] = (uint8_t)(packet.timestamp);
|
|
if (header_size) *header_size = packet.header_size;
|
|
return true;
|
|
}
|
|
|
|
static bool fill_nonce_from_additional_data(const uint8_t* data,
|
|
size_t len,
|
|
uint8_t nonce[24],
|
|
bool* used_rtp_header,
|
|
AdditionalTsState* ts_state,
|
|
bool* used_relative_ts) {
|
|
if (used_rtp_header) *used_rtp_header = false;
|
|
if (used_relative_ts) *used_relative_ts = false;
|
|
if (!data || len < 8) return false;
|
|
|
|
// Desktop-compatible path: additional_data contains encoded frame timestamp
|
|
// as 8-byte BE value. On Android sender/receiver can have different absolute
|
|
// base in some pipelines; ts_state enables optional relative fallback.
|
|
// Primary desktop-compatible mode uses absolute timestamp (ts_state == nullptr).
|
|
if (len == 8) {
|
|
uint64_t ts64 = load64_be(data);
|
|
uint64_t ts_rel = ts64;
|
|
if (ts_state != nullptr) {
|
|
if (!ts_state->initialized) {
|
|
ts_state->initialized = true;
|
|
ts_state->base_timestamp = ts64;
|
|
} else if (ts64 < ts_state->base_timestamp) {
|
|
// New stream or reset; avoid huge wrapped nonce drift.
|
|
ts_state->base_timestamp = ts64;
|
|
}
|
|
ts_rel = ts64 - ts_state->base_timestamp;
|
|
if (used_relative_ts) *used_relative_ts = true;
|
|
}
|
|
nonce[0] = (uint8_t)(ts_rel >> 56);
|
|
nonce[1] = (uint8_t)(ts_rel >> 48);
|
|
nonce[2] = (uint8_t)(ts_rel >> 40);
|
|
nonce[3] = (uint8_t)(ts_rel >> 32);
|
|
nonce[4] = (uint8_t)(ts_rel >> 24);
|
|
nonce[5] = (uint8_t)(ts_rel >> 16);
|
|
nonce[6] = (uint8_t)(ts_rel >> 8);
|
|
nonce[7] = (uint8_t)(ts_rel);
|
|
return true;
|
|
}
|
|
|
|
// Legacy native WebRTC layout: additional_data can be RTP header bytes.
|
|
if (len >= 12) {
|
|
const uint8_t version = (data[0] >> 6) & 0x03;
|
|
if (version == 2) {
|
|
uint32_t ts = load32_be(data + 4);
|
|
nonce[4] = (uint8_t)(ts >> 24);
|
|
nonce[5] = (uint8_t)(ts >> 16);
|
|
nonce[6] = (uint8_t)(ts >> 8);
|
|
nonce[7] = (uint8_t)(ts);
|
|
if (used_rtp_header) *used_rtp_header = true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Generic fallback: first 8 bytes as BE timestamp-like payload.
|
|
memcpy(nonce, data, 8);
|
|
return true;
|
|
}
|
|
|
|
static bool is_plausible_opus_packet(const uint8_t* packet, size_t len);
|
|
|
|
static bool is_plausible_decrypted_audio_frame(const uint8_t* data, size_t len) {
|
|
if (!data || len == 0) return false;
|
|
|
|
ParsedRtpPacket packet;
|
|
if (parse_rtp_packet(data, len, &packet) && packet.header_size < len) {
|
|
return is_plausible_opus_packet(data + packet.header_size, len - packet.header_size);
|
|
}
|
|
return is_plausible_opus_packet(data, len);
|
|
}
|
|
|
|
static inline void fill_nonce_from_ts32(uint32_t ts, uint8_t nonce[24]) {
|
|
nonce[4] = (uint8_t)(ts >> 24);
|
|
nonce[5] = (uint8_t)(ts >> 16);
|
|
nonce[6] = (uint8_t)(ts >> 8);
|
|
nonce[7] = (uint8_t)(ts);
|
|
}
|
|
|
|
static inline void fill_nonce_from_ts64(uint64_t ts, uint8_t nonce[24]) {
|
|
nonce[0] = (uint8_t)(ts >> 56);
|
|
nonce[1] = (uint8_t)(ts >> 48);
|
|
nonce[2] = (uint8_t)(ts >> 40);
|
|
nonce[3] = (uint8_t)(ts >> 32);
|
|
nonce[4] = (uint8_t)(ts >> 24);
|
|
nonce[5] = (uint8_t)(ts >> 16);
|
|
nonce[6] = (uint8_t)(ts >> 8);
|
|
nonce[7] = (uint8_t)(ts);
|
|
}
|
|
|
|
static inline uint64_t monotonic_48k_ticks() {
|
|
struct timespec ts {};
|
|
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) return 0;
|
|
const uint64_t sec = (uint64_t)ts.tv_sec;
|
|
const uint64_t nsec = (uint64_t)ts.tv_nsec;
|
|
return sec * 48000ULL + (nsec * 48000ULL) / 1000000000ULL;
|
|
}
|
|
|
|
static inline uint32_t opus_base_frame_samples(uint8_t config) {
|
|
// RFC 6716 TOC config mapping at 48 kHz.
|
|
if (config <= 11) {
|
|
// SILK: 10/20/40/60 ms
|
|
static const uint32_t kSilk[4] = {480, 960, 1920, 2880};
|
|
return kSilk[config & 0x03];
|
|
}
|
|
if (config <= 15) {
|
|
// Hybrid: 10/20 ms
|
|
return (config & 0x01) ? 960 : 480;
|
|
}
|
|
// CELT-only: 2.5/5/10/20 ms
|
|
static const uint32_t kCelt[4] = {120, 240, 480, 960};
|
|
return kCelt[config & 0x03];
|
|
}
|
|
|
|
static uint32_t infer_opus_packet_duration_samples(const uint8_t* packet, size_t len) {
|
|
if (!packet || len == 0) return 960;
|
|
|
|
const uint8_t toc = packet[0];
|
|
const uint8_t config = (uint8_t)(toc >> 3);
|
|
const uint8_t frame_code = (uint8_t)(toc & 0x03);
|
|
|
|
uint32_t frame_count = 1;
|
|
if (frame_code == 1 || frame_code == 2) {
|
|
frame_count = 2;
|
|
} else if (frame_code == 3) {
|
|
if (len < 2) return 960;
|
|
frame_count = (uint32_t)(packet[1] & 0x3F);
|
|
if (frame_count == 0 || frame_count > 48) return 960;
|
|
}
|
|
|
|
uint32_t base = opus_base_frame_samples(config);
|
|
uint32_t total = base * frame_count;
|
|
if (total < 120 || total > 5760) return 960;
|
|
return total;
|
|
}
|
|
|
|
static bool decode_opus_frame_len(const uint8_t* packet,
|
|
size_t len,
|
|
size_t* offset,
|
|
size_t* frame_len) {
|
|
if (!packet || !offset || !frame_len) return false;
|
|
if (*offset >= len) return false;
|
|
|
|
const uint8_t b0 = packet[*offset];
|
|
(*offset)++;
|
|
if (b0 < 252) {
|
|
*frame_len = b0;
|
|
return true;
|
|
}
|
|
if (*offset >= len) return false;
|
|
const uint8_t b1 = packet[*offset];
|
|
(*offset)++;
|
|
*frame_len = (size_t)b1 * 4u + (size_t)b0;
|
|
return true;
|
|
}
|
|
|
|
static bool is_plausible_opus_packet(const uint8_t* packet, size_t len) {
|
|
if (!packet || len == 0 || len > 2000) return false;
|
|
|
|
const uint8_t toc = packet[0];
|
|
const uint8_t config = (uint8_t)(toc >> 3);
|
|
if (config > 31) return false;
|
|
|
|
const uint8_t frame_code = (uint8_t)(toc & 0x03);
|
|
const size_t payload_len = len - 1;
|
|
|
|
if (frame_code == 0) {
|
|
// 1 frame, full payload
|
|
return payload_len >= 1;
|
|
}
|
|
|
|
if (frame_code == 1) {
|
|
// 2 CBR frames, equal sizes.
|
|
if (payload_len < 2) return false;
|
|
return (payload_len % 2) == 0;
|
|
}
|
|
|
|
if (frame_code == 2) {
|
|
// 2 VBR frames
|
|
size_t off = 1;
|
|
size_t len1 = 0;
|
|
if (!decode_opus_frame_len(packet, len, &off, &len1)) return false;
|
|
if (len1 == 0) return false;
|
|
if (off + len1 >= len) return false; // need non-empty second frame
|
|
const size_t len2 = len - off - len1;
|
|
if (len2 == 0) return false;
|
|
return true;
|
|
}
|
|
|
|
// frame_code == 3: arbitrary number of frames
|
|
if (len < 2) return false;
|
|
const uint8_t ch = packet[1];
|
|
const bool vbr = (ch & 0x80) != 0;
|
|
const bool has_padding = (ch & 0x40) != 0;
|
|
const uint8_t frame_count = (uint8_t)(ch & 0x3F);
|
|
if (frame_count == 0 || frame_count > 48) return false;
|
|
|
|
const uint32_t total = opus_base_frame_samples(config) * (uint32_t)frame_count;
|
|
if (total > 5760) return false;
|
|
|
|
// Padding bit is rarely used in live voice. Rejecting it improves
|
|
// discrimination between valid Opus and random decrypted noise.
|
|
if (has_padding) return false;
|
|
|
|
if (!vbr) {
|
|
const size_t data_len = len - 2;
|
|
if (data_len < (size_t)frame_count) return false;
|
|
return (data_len % frame_count) == 0;
|
|
}
|
|
|
|
// VBR: parse lengths for first N-1 frames; last frame consumes rest.
|
|
size_t off = 2;
|
|
size_t consumed = 0;
|
|
for (size_t i = 0; i + 1 < frame_count; ++i) {
|
|
size_t flen = 0;
|
|
if (!decode_opus_frame_len(packet, len, &off, &flen)) return false;
|
|
if (flen == 0) return false;
|
|
consumed += flen;
|
|
if (off + consumed >= len) return false;
|
|
}
|
|
const size_t remaining = len - off;
|
|
if (remaining <= consumed) return false;
|
|
const size_t last = remaining - consumed;
|
|
return last > 0;
|
|
}
|
|
|
|
/* ── Native crash handler — writes to file before dying ──────── */
|
|
|
|
static char g_crash_path[512] = {0};
|
|
static struct sigaction g_old_sigsegv = {};
|
|
static struct sigaction g_old_sigabrt = {};
|
|
|
|
static void native_crash_handler(int sig, siginfo_t *info, void *ctx) {
|
|
if (g_crash_path[0] != 0) {
|
|
int fd = open(g_crash_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
if (fd >= 0) {
|
|
const char *msg;
|
|
if (sig == SIGSEGV)
|
|
msg = "NATIVE CRASH: SIGSEGV (segmentation fault) in rosetta_e2ee\n"
|
|
"Fault address: see logcat for details.\n";
|
|
else if (sig == SIGABRT)
|
|
msg = "NATIVE CRASH: SIGABRT (abort) in rosetta_e2ee\n";
|
|
else
|
|
msg = "NATIVE CRASH: unknown signal in rosetta_e2ee\n";
|
|
write(fd, msg, strlen(msg));
|
|
close(fd);
|
|
}
|
|
}
|
|
LOGE("NATIVE CRASH sig=%d addr=%p", sig, info ? info->si_addr : nullptr);
|
|
|
|
struct sigaction *old = (sig == SIGSEGV) ? &g_old_sigsegv : &g_old_sigabrt;
|
|
sigaction(sig, old, nullptr);
|
|
raise(sig);
|
|
}
|
|
|
|
/* ════════════════════════════════════════════════════════════════
|
|
* XChaCha20 Encryptor — inherits from the REAL interface
|
|
* ════════════════════════════════════════════════════════════════ */
|
|
|
|
class XChaCha20Encryptor final : public webrtc::FrameEncryptorInterface {
|
|
public:
|
|
explicit XChaCha20Encryptor(const uint8_t key[32]) {
|
|
memcpy(key_, key, 32);
|
|
key_fingerprint_ = key_fingerprint32(key_);
|
|
LOGI("ENC init ptr=%p key_fp=%08x", this, key_fingerprint_);
|
|
diag_event("ENC init ptr=%p key_fp=%08x\n", this, key_fingerprint_);
|
|
}
|
|
|
|
uint32_t KeyFingerprint() const { return key_fingerprint_; }
|
|
int FrameCount() const { return diag_count_.load(std::memory_order_relaxed); }
|
|
|
|
/* ── RefCountInterface ─────────────────────────────────────── */
|
|
void AddRef() const override {
|
|
ref_.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
|
|
rtc::RefCountReleaseStatus Release() const override {
|
|
if (ref_.fetch_sub(1, std::memory_order_acq_rel) == 1) {
|
|
delete this;
|
|
return rtc::RefCountReleaseStatus::kDroppedLastRef;
|
|
}
|
|
return rtc::RefCountReleaseStatus::kOtherRefsRemained;
|
|
}
|
|
|
|
/**
|
|
* Desktop-compatible frame format: ciphertext only (no custom prefix).
|
|
*
|
|
* Nonce (24 bytes) is derived exactly like desktop:
|
|
* - nonce[0..3] = 0
|
|
* - nonce[4..7] = RTP timestamp
|
|
* - nonce[8..23] = 0
|
|
*
|
|
* Primary source of timestamp: additional_data[4..7] (if provided by WebRTC).
|
|
* Fallback (Android path where additional_data can be empty):
|
|
* parse RTP header from frame and take timestamp from frame[4..7].
|
|
*
|
|
* If RTP header is found inside frame, we leave header bytes unencrypted
|
|
* and encrypt only payload (desktop-compatible).
|
|
*/
|
|
int Encrypt(cricket::MediaType media_type,
|
|
uint32_t ssrc,
|
|
rtc::ArrayView<const uint8_t> additional_data,
|
|
rtc::ArrayView<const uint8_t> frame,
|
|
rtc::ArrayView<uint8_t> encrypted_frame,
|
|
size_t* bytes_written) override {
|
|
if (frame.size() == 0 || encrypted_frame.size() < frame.size()) {
|
|
*bytes_written = 0;
|
|
return -1;
|
|
}
|
|
|
|
size_t header_size = 0;
|
|
bool nonce_from_rtp_header = false;
|
|
bool nonce_from_generated_ts = false;
|
|
bool nonce_from_additional_data = false;
|
|
bool additional_was_rtp_header = false;
|
|
bool additional_used_mono_offset = false;
|
|
uint32_t generated_ts_used = 0;
|
|
|
|
// Build nonce from RTP timestamp in additional_data (preferred).
|
|
uint8_t nonce[24] = {0};
|
|
bool additional_used_relative_ts = false;
|
|
nonce_from_additional_data = fill_nonce_from_additional_data(
|
|
additional_data.data(),
|
|
additional_data.size(),
|
|
nonce,
|
|
&additional_was_rtp_header,
|
|
nullptr,
|
|
&additional_used_relative_ts);
|
|
if (!nonce_from_additional_data) {
|
|
nonce_from_rtp_header =
|
|
fill_nonce_from_rtp_frame(frame.data(), frame.size(), &rtp_probe_, nonce, &header_size);
|
|
if (!nonce_from_rtp_header) {
|
|
if (!generated_ts_.initialized) {
|
|
generated_ts_.initialized = true;
|
|
generated_ts_.next_timestamp = 0;
|
|
generated_ts_.next_step = 960;
|
|
}
|
|
nonce_from_generated_ts = true;
|
|
generated_ts_used = generated_ts_.next_timestamp;
|
|
fill_nonce_from_ts32(generated_ts_used, nonce);
|
|
diag_event("ENC fallback=generated-ts mt=%s ssrc=%u frame_sz=%zu ad_sz=%zu gen_ts=%u\n",
|
|
media_type_name(media_type), ssrc, frame.size(), additional_data.size(), generated_ts_used);
|
|
}
|
|
}
|
|
|
|
// Some Android sender pipelines expose stream-relative ad8 timestamps
|
|
// (0, 960, 1920, ...), while desktop receiver expects an absolute base.
|
|
// For interop, add a monotonic 48k offset once when first ad8 is tiny.
|
|
if (nonce_from_additional_data &&
|
|
additional_data.size() == 8 &&
|
|
!additional_was_rtp_header &&
|
|
additional_data.data() != nullptr) {
|
|
const uint64_t ad_ts64 = load64_be(additional_data.data());
|
|
if (!sender_ts_offset_.initialized) {
|
|
sender_ts_offset_.initialized = true;
|
|
// Keep pure raw-abs mode by default; desktop is the source of truth.
|
|
sender_ts_offset_.enabled = false;
|
|
sender_ts_offset_.offset = 0ULL;
|
|
diag_event("ENC ad8-base init ssrc=%u ad_ts=%llu use_mono=%d mono_off=%llu\n",
|
|
ssrc,
|
|
(unsigned long long)ad_ts64,
|
|
sender_ts_offset_.enabled ? 1 : 0,
|
|
(unsigned long long)sender_ts_offset_.offset);
|
|
}
|
|
if (sender_ts_offset_.enabled) {
|
|
const uint64_t ts_adj = ad_ts64 + sender_ts_offset_.offset;
|
|
fill_nonce_from_ts64(ts_adj, nonce);
|
|
additional_used_mono_offset = true;
|
|
}
|
|
}
|
|
|
|
// Desktop createEncodedStreams encrypts full encoded chunk.
|
|
// To stay wire-compatible, do not preserve any leading RTP-like bytes.
|
|
rosetta_xchacha20_xor(encrypted_frame.data(),
|
|
frame.data(), frame.size(), nonce, key_);
|
|
*bytes_written = frame.size();
|
|
|
|
if (nonce_from_generated_ts) {
|
|
const uint32_t step = infer_opus_packet_duration_samples(frame.data(), frame.size());
|
|
generated_ts_.next_step = step;
|
|
generated_ts_.next_timestamp = generated_ts_used + step;
|
|
}
|
|
|
|
// Diag: log first frames with enough context for crash analysis.
|
|
int n = diag_count_.fetch_add(1, std::memory_order_relaxed);
|
|
if (n < kDiagFrameLimit) {
|
|
uint8_t ad_prefix[8] = {0};
|
|
const size_t ad_copy = additional_data.size() < sizeof(ad_prefix)
|
|
? additional_data.size()
|
|
: sizeof(ad_prefix);
|
|
if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy);
|
|
ParsedRtpPacket rtp{};
|
|
const bool has_rtp = parse_rtp_packet(frame.data(), frame.size(), &rtp);
|
|
const bool opus_plausible =
|
|
has_rtp && rtp.header_size < frame.size()
|
|
? is_plausible_opus_packet(frame.data() + rtp.header_size, frame.size() - rtp.header_size)
|
|
: is_plausible_opus_packet(frame.data(), frame.size());
|
|
const uint32_t in_hash = fnv1a32(frame.data(), frame.size(), kFrameHashSampleBytes);
|
|
const uint32_t out_hash = fnv1a32(encrypted_frame.data(), frame.size(), kFrameHashSampleBytes);
|
|
const char* mode =
|
|
nonce_from_rtp_header
|
|
? "rtp"
|
|
: (nonce_from_generated_ts
|
|
? "gen"
|
|
: (nonce_from_additional_data
|
|
? (additional_was_rtp_header
|
|
? "ad-rtp"
|
|
: (additional_used_mono_offset
|
|
? "raw-abs+mono"
|
|
: (additional_used_relative_ts ? "raw-rel" : "raw-abs")))
|
|
: "raw-abs"));
|
|
LOGI("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x",
|
|
n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode,
|
|
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
|
|
has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0,
|
|
opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash,
|
|
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
|
|
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
|
|
diag_write("ENC frame#%d mt=%s ssrc=%u sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u rtp_ok=%d rtp_seq=%u rtp_ts=%u rtp_ssrc=%u opus_ok=%d key_fp=%08x in_h=%08x out_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n",
|
|
n, media_type_name(media_type), ssrc, frame.size(), additional_data.size(), header_size, mode,
|
|
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
|
|
has_rtp ? 1 : 0, has_rtp ? rtp.sequence : 0, has_rtp ? rtp.timestamp : 0, has_rtp ? rtp.ssrc : 0,
|
|
opus_plausible ? 1 : 0, key_fingerprint_, in_hash, out_hash,
|
|
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
|
|
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
size_t GetMaxCiphertextByteSize(cricket::MediaType, size_t frame_size) override {
|
|
return frame_size;
|
|
}
|
|
|
|
protected:
|
|
~XChaCha20Encryptor() override {
|
|
diag_event("ENC destroy ptr=%p key_fp=%08x\n", this, key_fingerprint_);
|
|
memset(key_, 0, 32);
|
|
}
|
|
|
|
private:
|
|
mutable std::atomic<int> ref_{0};
|
|
mutable std::atomic<int> diag_count_{0};
|
|
mutable RtpProbeState rtp_probe_;
|
|
mutable GeneratedTsState generated_ts_;
|
|
mutable SenderTsOffsetState sender_ts_offset_;
|
|
uint32_t key_fingerprint_ = 0;
|
|
uint8_t key_[32];
|
|
};
|
|
|
|
/* ════════════════════════════════════════════════════════════════
|
|
* XChaCha20 Decryptor — inherits from the REAL interface
|
|
* ════════════════════════════════════════════════════════════════ */
|
|
|
|
class XChaCha20Decryptor final : public webrtc::FrameDecryptorInterface {
|
|
public:
|
|
explicit XChaCha20Decryptor(const uint8_t key[32]) {
|
|
memcpy(key_, key, 32);
|
|
key_fingerprint_ = key_fingerprint32(key_);
|
|
LOGI("DEC init ptr=%p key_fp=%08x", this, key_fingerprint_);
|
|
diag_event("DEC init ptr=%p key_fp=%08x\n", this, key_fingerprint_);
|
|
}
|
|
|
|
uint32_t KeyFingerprint() const { return key_fingerprint_; }
|
|
int FrameCount() const { return diag_count_.load(std::memory_order_relaxed); }
|
|
uint32_t BadStreak() const { return bad_audio_streak_.load(std::memory_order_relaxed); }
|
|
|
|
/* ── RefCountInterface ─────────────────────────────────────── */
|
|
void AddRef() const override {
|
|
ref_.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
|
|
rtc::RefCountReleaseStatus Release() const override {
|
|
if (ref_.fetch_sub(1, std::memory_order_acq_rel) == 1) {
|
|
delete this;
|
|
return rtc::RefCountReleaseStatus::kDroppedLastRef;
|
|
}
|
|
return rtc::RefCountReleaseStatus::kOtherRefsRemained;
|
|
}
|
|
|
|
/**
|
|
* Desktop-compatible decrypt:
|
|
* - nonce from RTP timestamp
|
|
* - if RTP header is present inside encrypted_frame (fallback path),
|
|
* keep header bytes untouched and decrypt payload only.
|
|
*/
|
|
Result Decrypt(cricket::MediaType media_type,
|
|
const std::vector<uint32_t>& csrcs,
|
|
rtc::ArrayView<const uint8_t> additional_data,
|
|
rtc::ArrayView<const uint8_t> encrypted_frame,
|
|
rtc::ArrayView<uint8_t> frame) override {
|
|
uint8_t nonce[24] = {0};
|
|
size_t header_size = 0;
|
|
bool nonce_from_rtp_header = false;
|
|
bool nonce_from_generated_ts = false;
|
|
bool nonce_from_additional_data = false;
|
|
bool additional_was_rtp_header = false;
|
|
bool additional_used_relative_ts = false;
|
|
bool used_additional_relative_fallback = false;
|
|
bool used_plain_passthrough = false;
|
|
uint32_t generated_ts_used = 0;
|
|
nonce_from_additional_data = fill_nonce_from_additional_data(
|
|
additional_data.data(),
|
|
additional_data.size(),
|
|
nonce,
|
|
&additional_was_rtp_header,
|
|
nullptr,
|
|
&additional_used_relative_ts);
|
|
if (!nonce_from_additional_data) {
|
|
nonce_from_rtp_header =
|
|
fill_nonce_from_rtp_frame(encrypted_frame.data(), encrypted_frame.size(), &rtp_probe_, nonce, &header_size);
|
|
if (!nonce_from_rtp_header) {
|
|
if (!generated_ts_.initialized) {
|
|
generated_ts_.initialized = true;
|
|
generated_ts_.next_timestamp = 0;
|
|
generated_ts_.next_step = 960;
|
|
}
|
|
nonce_from_generated_ts = true;
|
|
generated_ts_used = generated_ts_.next_timestamp;
|
|
fill_nonce_from_ts32(generated_ts_used, nonce);
|
|
diag_event("DEC fallback=generated-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu gen_ts=%u\n",
|
|
media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), generated_ts_used);
|
|
}
|
|
}
|
|
|
|
if (encrypted_frame.size() == 0 || frame.size() < encrypted_frame.size()) {
|
|
return {Result::Status::kFailedToDecrypt, 0};
|
|
}
|
|
|
|
bool used_generated_resync = false;
|
|
|
|
// Desktop createEncodedStreams decrypts full encoded chunk.
|
|
rosetta_xchacha20_xor(frame.data(), encrypted_frame.data(), encrypted_frame.size(), nonce, key_);
|
|
|
|
if (nonce_from_additional_data) {
|
|
bool plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size());
|
|
|
|
// Fallback for Android pipelines where additional_data timestamps are
|
|
// stream-relative while remote side uses a different absolute base.
|
|
if (!plausible && additional_data.size() == 8 && !additional_was_rtp_header) {
|
|
uint8_t nonce_rel[24] = {0};
|
|
bool rel_used = false;
|
|
if (fill_nonce_from_additional_data(
|
|
additional_data.data(),
|
|
additional_data.size(),
|
|
nonce_rel,
|
|
nullptr,
|
|
&additional_rel_ts_state_,
|
|
&rel_used) &&
|
|
rel_used) {
|
|
std::vector<uint8_t> candidate(encrypted_frame.size());
|
|
rosetta_xchacha20_xor(
|
|
candidate.data(),
|
|
encrypted_frame.data(),
|
|
encrypted_frame.size(),
|
|
nonce_rel,
|
|
key_);
|
|
if (is_plausible_decrypted_audio_frame(candidate.data(), candidate.size())) {
|
|
memcpy(frame.data(), candidate.data(), candidate.size());
|
|
memcpy(nonce, nonce_rel, sizeof(nonce));
|
|
plausible = true;
|
|
used_additional_relative_fallback = true;
|
|
additional_used_relative_ts = true;
|
|
diag_event("DEC fallback=relative-ad-ts mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu nonce_ts=%u\n",
|
|
media_type_name(media_type), csrcs.size(), encrypted_frame.size(),
|
|
additional_data.size(), nonce_ts32(nonce));
|
|
}
|
|
}
|
|
}
|
|
|
|
// If payload already looks like valid Opus, keep plaintext.
|
|
// This protects interop when peer stream is unexpectedly unencrypted.
|
|
if (!plausible &&
|
|
is_plausible_decrypted_audio_frame(encrypted_frame.data(), encrypted_frame.size())) {
|
|
memcpy(frame.data(), encrypted_frame.data(), encrypted_frame.size());
|
|
used_plain_passthrough = true;
|
|
diag_event("DEC fallback=plain-passthrough mt=%s csrcs=%zu enc_sz=%zu ad_sz=%zu\n",
|
|
media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size());
|
|
}
|
|
}
|
|
|
|
if (nonce_from_generated_ts) {
|
|
bool plausible = is_plausible_opus_packet(frame.data(), encrypted_frame.size());
|
|
|
|
// Recover after lost packets by probing a few forward timestamp steps.
|
|
if (!plausible) {
|
|
std::vector<uint8_t> candidate(encrypted_frame.size());
|
|
for (uint32_t i = 1; i <= 8; ++i) {
|
|
const uint32_t ts_try = generated_ts_used + generated_ts_.next_step * i;
|
|
uint8_t nonce_try[24] = {0};
|
|
fill_nonce_from_ts32(ts_try, nonce_try);
|
|
rosetta_xchacha20_xor(
|
|
candidate.data(),
|
|
encrypted_frame.data(),
|
|
encrypted_frame.size(),
|
|
nonce_try,
|
|
key_);
|
|
if (is_plausible_opus_packet(candidate.data(), candidate.size())) {
|
|
memcpy(frame.data(), candidate.data(), candidate.size());
|
|
generated_ts_used = ts_try;
|
|
used_generated_resync = true;
|
|
plausible = true;
|
|
diag_event("DEC fallback=generated-resync mt=%s csrcs=%zu enc_sz=%zu ts_try=%u step=%u probe=%u\n",
|
|
media_type_name(media_type), csrcs.size(), encrypted_frame.size(),
|
|
ts_try, generated_ts_.next_step, i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
const uint32_t step = infer_opus_packet_duration_samples(frame.data(), encrypted_frame.size());
|
|
generated_ts_.next_step = step;
|
|
generated_ts_.next_timestamp = generated_ts_used + step;
|
|
}
|
|
|
|
// Diag: log first frames with enough context for crash analysis.
|
|
int n = diag_count_.fetch_add(1, std::memory_order_relaxed);
|
|
if (n < kDiagFrameLimit) {
|
|
uint8_t ad_prefix[8] = {0};
|
|
const size_t ad_copy = additional_data.size() < sizeof(ad_prefix)
|
|
? additional_data.size()
|
|
: sizeof(ad_prefix);
|
|
if (ad_copy > 0) memcpy(ad_prefix, additional_data.data(), ad_copy);
|
|
ParsedRtpPacket enc_rtp{};
|
|
ParsedRtpPacket dec_rtp{};
|
|
const bool has_enc_rtp = parse_rtp_packet(encrypted_frame.data(), encrypted_frame.size(), &enc_rtp);
|
|
const bool has_dec_rtp = parse_rtp_packet(frame.data(), encrypted_frame.size(), &dec_rtp);
|
|
const bool dec_plausible = is_plausible_decrypted_audio_frame(frame.data(), encrypted_frame.size());
|
|
const uint32_t enc_hash = fnv1a32(encrypted_frame.data(), encrypted_frame.size(), kFrameHashSampleBytes);
|
|
const uint32_t dec_hash = fnv1a32(frame.data(), encrypted_frame.size(), kFrameHashSampleBytes);
|
|
const char* mode = nullptr;
|
|
if (nonce_from_rtp_header) {
|
|
mode = "rtp";
|
|
} else if (nonce_from_generated_ts) {
|
|
mode = used_generated_resync ? "gen-resync" : "gen";
|
|
} else if (nonce_from_additional_data) {
|
|
if (used_plain_passthrough) mode = "raw-plain";
|
|
else if (additional_was_rtp_header) mode = "ad-rtp";
|
|
else if (used_additional_relative_fallback) mode = "raw-rel-fb";
|
|
else mode = additional_used_relative_ts ? "raw-rel" : "raw-abs";
|
|
} else {
|
|
mode = "raw-abs";
|
|
}
|
|
uint32_t bad_streak = 0;
|
|
if (!dec_plausible) {
|
|
bad_streak = bad_audio_streak_.fetch_add(1, std::memory_order_relaxed) + 1;
|
|
if (bad_streak == 1 || bad_streak == 3 || bad_streak == 10 || (bad_streak % 50) == 0) {
|
|
diag_event("DEC degraded mt=%s csrcs=%zu mode=%s bad_streak=%u nonce_ts=%u key_fp=%08x\n",
|
|
media_type_name(media_type), csrcs.size(), mode, bad_streak,
|
|
nonce_ts32(nonce), key_fingerprint_);
|
|
}
|
|
} else {
|
|
const uint32_t prev_bad = bad_audio_streak_.exchange(0, std::memory_order_relaxed);
|
|
if (prev_bad >= 3) {
|
|
diag_event("DEC recovered mt=%s csrcs=%zu mode=%s prev_bad_streak=%u nonce_ts=%u key_fp=%08x\n",
|
|
media_type_name(media_type), csrcs.size(), mode, prev_bad,
|
|
nonce_ts32(nonce), key_fingerprint_);
|
|
}
|
|
}
|
|
LOGI("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x",
|
|
n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode,
|
|
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
|
|
dec_plausible ? 1 : 0, bad_streak,
|
|
has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0,
|
|
has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0,
|
|
key_fingerprint_, enc_hash, dec_hash,
|
|
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
|
|
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
|
|
diag_write("DEC frame#%d mt=%s csrcs=%zu enc_sz=%zu ad=%zu hdr=%zu mode=%s nonce_ts=%u gen_ts=%u next_step=%u dec_ok=%d bad_streak=%u enc_rtp=%d enc_seq=%u enc_ts=%u enc_ssrc=%u dec_rtp=%d dec_seq=%u dec_ts=%u dec_ssrc=%u key_fp=%08x enc_h=%08x dec_h=%08x ad8=%02x%02x%02x%02x%02x%02x%02x%02x\n",
|
|
n, media_type_name(media_type), csrcs.size(), encrypted_frame.size(), additional_data.size(), header_size, mode,
|
|
nonce_ts32(nonce), generated_ts_used, generated_ts_.next_step,
|
|
dec_plausible ? 1 : 0, bad_streak,
|
|
has_enc_rtp ? 1 : 0, has_enc_rtp ? enc_rtp.sequence : 0, has_enc_rtp ? enc_rtp.timestamp : 0, has_enc_rtp ? enc_rtp.ssrc : 0,
|
|
has_dec_rtp ? 1 : 0, has_dec_rtp ? dec_rtp.sequence : 0, has_dec_rtp ? dec_rtp.timestamp : 0, has_dec_rtp ? dec_rtp.ssrc : 0,
|
|
key_fingerprint_, enc_hash, dec_hash,
|
|
ad_prefix[0], ad_prefix[1], ad_prefix[2], ad_prefix[3],
|
|
ad_prefix[4], ad_prefix[5], ad_prefix[6], ad_prefix[7]);
|
|
}
|
|
|
|
return {Result::Status::kOk, encrypted_frame.size()};
|
|
}
|
|
|
|
size_t GetMaxPlaintextByteSize(cricket::MediaType, size_t encrypted_frame_size) override {
|
|
return encrypted_frame_size;
|
|
}
|
|
|
|
protected:
|
|
~XChaCha20Decryptor() override {
|
|
diag_event("DEC destroy ptr=%p key_fp=%08x bad_streak=%u\n",
|
|
this, key_fingerprint_, bad_audio_streak_.load(std::memory_order_relaxed));
|
|
memset(key_, 0, 32);
|
|
}
|
|
|
|
private:
|
|
mutable std::atomic<int> ref_{0};
|
|
mutable std::atomic<int> diag_count_{0};
|
|
mutable std::atomic<uint32_t> bad_audio_streak_{0};
|
|
mutable RtpProbeState rtp_probe_;
|
|
mutable GeneratedTsState generated_ts_;
|
|
mutable AdditionalTsState additional_rel_ts_state_;
|
|
uint32_t key_fingerprint_ = 0;
|
|
uint8_t key_[32];
|
|
};
|
|
|
|
/* ════════════════════════════════════════════════════════════════
|
|
* JNI exports
|
|
* ════════════════════════════════════════════════════════════════ */
|
|
|
|
extern "C" {
|
|
|
|
/* ── HSalsa20 for nacl.box.before() ──────────────────────────── */
|
|
|
|
JNIEXPORT jbyteArray JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeHSalsa20(
|
|
JNIEnv *env, jclass, jbyteArray jRawDh)
|
|
{
|
|
jsize len = env->GetArrayLength(jRawDh);
|
|
if (len < 32) return nullptr;
|
|
|
|
auto *raw = (uint8_t *)env->GetByteArrayElements(jRawDh, nullptr);
|
|
uint8_t out[32];
|
|
uint8_t zeros[16] = {0};
|
|
|
|
rosetta_hsalsa20(out, zeros, raw);
|
|
|
|
env->ReleaseByteArrayElements(jRawDh, (jbyte *)raw, JNI_ABORT);
|
|
|
|
jbyteArray result = env->NewByteArray(32);
|
|
env->SetByteArrayRegion(result, 0, 32, (jbyte *)out);
|
|
memset(out, 0, 32);
|
|
return result;
|
|
}
|
|
|
|
/* ── Create / destroy encryptor ──────────────────────────────── */
|
|
|
|
JNIEXPORT jlong JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateEncryptor(
|
|
JNIEnv *env, jclass, jbyteArray jKey)
|
|
{
|
|
jsize len = env->GetArrayLength(jKey);
|
|
if (len < 32) {
|
|
LOGE("Create encryptor failed: key length=%d (<32)", (int)len);
|
|
diag_event("ENC create-failed key_len=%d\n", (int)len);
|
|
return 0;
|
|
}
|
|
|
|
auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr);
|
|
const uint32_t key_fp = key_fingerprint32(key);
|
|
auto *enc = new XChaCha20Encryptor(key);
|
|
env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT);
|
|
|
|
// AddRef so the pointer we hand out has ref=1.
|
|
// WebRTC's scoped_refptr will AddRef again when it takes ownership.
|
|
enc->AddRef();
|
|
|
|
LOGI("Created XChaCha20 encryptor %p key_fp=%08x", enc, key_fp);
|
|
diag_event("ENC created ptr=%p key_fp=%08x key_len=%d\n", enc, key_fp, (int)len);
|
|
return reinterpret_cast<jlong>(enc);
|
|
}
|
|
|
|
JNIEXPORT void JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseEncryptor(
|
|
JNIEnv *, jclass, jlong ptr)
|
|
{
|
|
if (ptr == 0) return;
|
|
auto *enc = reinterpret_cast<XChaCha20Encryptor *>(ptr);
|
|
LOGI("Release XChaCha20 encryptor %p key_fp=%08x", enc, enc->KeyFingerprint());
|
|
diag_event("ENC release ptr=%p key_fp=%08x\n", enc, enc->KeyFingerprint());
|
|
enc->Release();
|
|
}
|
|
|
|
/* ── Create / destroy decryptor ──────────────────────────────── */
|
|
|
|
JNIEXPORT jlong JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCreateDecryptor(
|
|
JNIEnv *env, jclass, jbyteArray jKey)
|
|
{
|
|
jsize len = env->GetArrayLength(jKey);
|
|
if (len < 32) {
|
|
LOGE("Create decryptor failed: key length=%d (<32)", (int)len);
|
|
diag_event("DEC create-failed key_len=%d\n", (int)len);
|
|
return 0;
|
|
}
|
|
|
|
auto *key = (uint8_t *)env->GetByteArrayElements(jKey, nullptr);
|
|
const uint32_t key_fp = key_fingerprint32(key);
|
|
auto *dec = new XChaCha20Decryptor(key);
|
|
env->ReleaseByteArrayElements(jKey, (jbyte *)key, JNI_ABORT);
|
|
|
|
dec->AddRef();
|
|
|
|
LOGI("Created XChaCha20 decryptor %p key_fp=%08x", dec, key_fp);
|
|
diag_event("DEC created ptr=%p key_fp=%08x key_len=%d\n", dec, key_fp, (int)len);
|
|
return reinterpret_cast<jlong>(dec);
|
|
}
|
|
|
|
JNIEXPORT void JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeReleaseDecryptor(
|
|
JNIEnv *, jclass, jlong ptr)
|
|
{
|
|
if (ptr == 0) return;
|
|
auto *dec = reinterpret_cast<XChaCha20Decryptor *>(ptr);
|
|
LOGI("Release XChaCha20 decryptor %p key_fp=%08x", dec, dec->KeyFingerprint());
|
|
diag_event("DEC release ptr=%p key_fp=%08x\n", dec, dec->KeyFingerprint());
|
|
dec->Release();
|
|
}
|
|
|
|
/* ── Install native crash handler ─────────────────────────────── */
|
|
|
|
JNIEXPORT void JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeInstallCrashHandler(
|
|
JNIEnv *env, jclass, jstring jPath)
|
|
{
|
|
const char *path = env->GetStringUTFChars(jPath, nullptr);
|
|
strncpy(g_crash_path, path, sizeof(g_crash_path) - 1);
|
|
env->ReleaseStringUTFChars(jPath, path);
|
|
|
|
struct sigaction sa = {};
|
|
sa.sa_sigaction = native_crash_handler;
|
|
sa.sa_flags = SA_SIGINFO;
|
|
sigemptyset(&sa.sa_mask);
|
|
|
|
sigaction(SIGSEGV, &sa, &g_old_sigsegv);
|
|
sigaction(SIGABRT, &sa, &g_old_sigabrt);
|
|
|
|
LOGI("Native crash handler installed, path=%s", g_crash_path);
|
|
}
|
|
|
|
/* ── Open diagnostics file for E2EE frame logging ────────────── */
|
|
|
|
JNIEXPORT void JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeOpenDiagFile(
|
|
JNIEnv *env, jclass, jstring jPath)
|
|
{
|
|
if (g_diag_fd >= 0) { close(g_diag_fd); g_diag_fd = -1; }
|
|
g_diag_event_count.store(0, std::memory_order_relaxed);
|
|
|
|
const char *path = env->GetStringUTFChars(jPath, nullptr);
|
|
strncpy(g_diag_path, path, sizeof(g_diag_path) - 1);
|
|
g_diag_fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
env->ReleaseStringUTFChars(jPath, path);
|
|
|
|
if (g_diag_fd >= 0) {
|
|
diag_write("=== E2EE DIAGNOSTICS pid=%d ===\n", (int)getpid());
|
|
LOGI("Diag file opened: %s", g_diag_path);
|
|
diag_event("DIAG open path=%s\n", g_diag_path);
|
|
} else {
|
|
LOGE("Failed to open diag file: %s", g_diag_path);
|
|
}
|
|
}
|
|
|
|
JNIEXPORT void JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeCloseDiagFile(
|
|
JNIEnv *, jclass)
|
|
{
|
|
if (g_diag_fd >= 0) {
|
|
diag_event("DIAG close path=%s\n", g_diag_path);
|
|
diag_write("=== END ===\n");
|
|
close(g_diag_fd);
|
|
g_diag_fd = -1;
|
|
}
|
|
}
|
|
|
|
/* ── Query frame counts for health checks ────────────────────── */
|
|
|
|
JNIEXPORT jint JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeGetEncryptorFrameCount(
|
|
JNIEnv *, jclass, jlong ptr)
|
|
{
|
|
if (ptr == 0) return -1;
|
|
auto *enc = reinterpret_cast<XChaCha20Encryptor *>(ptr);
|
|
return enc->FrameCount();
|
|
}
|
|
|
|
JNIEXPORT jint JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeGetDecryptorFrameCount(
|
|
JNIEnv *, jclass, jlong ptr)
|
|
{
|
|
if (ptr == 0) return -1;
|
|
auto *dec = reinterpret_cast<XChaCha20Decryptor *>(ptr);
|
|
return dec->FrameCount();
|
|
}
|
|
|
|
JNIEXPORT jint JNICALL
|
|
Java_com_rosetta_messenger_network_XChaCha20E2EE_nativeGetDecryptorBadStreak(
|
|
JNIEnv *, jclass, jlong ptr)
|
|
{
|
|
if (ptr == 0) return -1;
|
|
auto *dec = reinterpret_cast<XChaCha20Decryptor *>(ptr);
|
|
return dec->BadStreak();
|
|
}
|
|
|
|
} /* extern "C" */
|