Skip to content

Commit

Permalink
Voice client optimizations
Browse files Browse the repository at this point in the history
- disco.voice.udp: follow nonce format used by Discord for `xsalsa20_poly1305_suffix`
- disco.voice.udp: account for 12 Byte nonce of AES256 on decrypt
- disco.voice.udp: account for different header size of `_rtpsize` modes on decrypt
  • Loading branch information
elderlabs committed Apr 18, 2024
1 parent d988d6a commit c18845d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 21 deletions.
13 changes: 7 additions & 6 deletions disco/voice/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class VoiceClient(LoggingClass):

SUPPORTED_MODES = {
'aead_aes256_gcm_rtpsize',
'aead_aes256_gcm',
'aead_xchacha20_poly1305_rtpsize',
'xsalsa20_poly1305_lite_rtpsize',
'xsalsa20_poly1305_lite',
Expand All @@ -70,7 +71,7 @@ def __init__(self, client, server_id, is_dm=False, max_reconnects=5, encoder='js
self.server_id = server_id
self.channel_id = None
self.is_dm = is_dm
self.encoder = encoder or JSONEncoder
self.encoder = JSONEncoder
self.max_reconnects = max_reconnects
self.video_enabled = False
self.media = None
Expand Down Expand Up @@ -164,7 +165,7 @@ def ssrc_rtcp(self):
return self.ssrc + 3

def set_state(self, state):
self.log.debug('[{}] state {} -> {}'.format(self.channel_id, self.state, state))
self.log.debug('[{}] state {} -> {}'.format(self.channel_id or '-', self.state, state))
prev_state = self.state
self.state = state
self.state_emitter.emit(state, prev_state)
Expand Down Expand Up @@ -391,7 +392,7 @@ def on_voice_speaking(self, data):
priority=bool(data['speaking'] & SpeakingFlags.PRIORITY),
)

self.client.gw.events.emit('VoiceSpeaking', payload)
self.client.events.emit('VoiceSpeaking', payload)

def on_message(self, msg):
try:
Expand Down Expand Up @@ -469,7 +470,7 @@ def on_close(self, code=None, reason=None):
self.log.warning(f'[{self.channel_id}] Session invalidated. Spawning fresh connection to channel.')
return self.connect(self.channel_id, mute=self.mute, deaf=self.deaf, video=self.video_enabled)

wait_time = 0
wait_time = (self._reconnects * 5) - 5

self.log.info('[{}] {} in {} second{}'.format(self.channel_id, 'Resuming' if self._identified else 'Reconnecting', wait_time, 's' if wait_time != 1 else ''))
gevent_sleep(wait_time)
Expand All @@ -490,7 +491,7 @@ def connect(self, channel_id, timeout=10, **kwargs):
if self.state == VoiceState.CONNECTED:
self.log.debug('[{}] Moving to channel {}'.format(self.channel_id, channel_id))
else:
self.log.debug('[{}] Attempting connection to channel id {}'.format(self.channel_id, channel_id))
self.log.debug('[{}] Attempting connection to channel id {}'.format(self.channel_id or '-', channel_id))
self.set_state(VoiceState.AWAITING_ENDPOINT)

self.set_voice_state(channel_id, **kwargs)
Expand Down Expand Up @@ -532,7 +533,7 @@ def disconnect(self):
if self.client.state.voice_states.get(self._session_id):
del self.client.state.voice_states[self._session_id]

return self.client.gw.events.emit('VoiceDisconnect', self)
return self.client.events.emit('VoiceDisconnect', self)

def send_frame(self, *args, **kwargs):
self.udp.send_frame(*args, **kwargs)
Expand Down
41 changes: 26 additions & 15 deletions disco/voice/udp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

try:
from nacl.secret import SecretBox
from nacl.utils import random as nacl_random
except ImportError:
warnings_warn('nacl is not installed, voice support is disabled')

Expand Down Expand Up @@ -114,7 +113,7 @@ def increment_timestamp(self, by):
def setup_encryption(self, encryption_key):
if 'xsalsa20' in self.vc.mode:
self._secret_box = SecretBox(encryption_key)
elif self.vc.mode in ('aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm_rtpsize'):
elif self.vc.mode in ('aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
self._secret_box = AEScrypt(encryption_key, self.vc.mode)

def send_frame(self, frame, sequence=None, timestamp=None, incr_timestamp=None):
Expand All @@ -123,21 +122,20 @@ def send_frame(self, frame, sequence=None, timestamp=None, incr_timestamp=None):
struct_pack_into('>I', self._rtp_audio_header, 4, timestamp or self.timestamp) # BE, unsigned int
struct_pack_into('>i', self._rtp_audio_header, 8, self.vc.ssrc_audio) # BE, int

if self.vc.mode == 'aead_aes256_gcm_rtpsize':
if self.vc.mode in ('aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
nonce = bytearray(12) # 96-bits
else:
nonce = bytearray(24) # 192-bits is 24 bytes

if self.vc.mode in ('xsalsa20_poly1305_lite', 'xsalsa20_poly1305_lite_rtpsize', 'aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm_rtpsize'):
if self.vc.mode != 'xsalsa20_poly1305':
# Use an incrementing number as a nonce, only first 4 bytes of the nonce is padded on
self._nonce += 1
if self._nonce > MAX_UINT32:
self._nonce = 0
struct_pack_into('>I', nonce, 0, self._nonce) # BE, unsigned int
if self.vc.mode in ('xsalsa20_poly1305_lite', 'xsalsa20_poly1305_lite_rtpsize', 'aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
nonce_padding = nonce[:4]
elif self.vc.mode == 'xsalsa20_poly1305_suffix':
# Generate a nonce
nonce = nacl_random(SecretBox.NONCE_SIZE)
nonce_padding = nonce
elif self.vc.mode == 'xsalsa20_poly1305':
# Nonce is the header
Expand All @@ -147,7 +145,7 @@ def send_frame(self, frame, sequence=None, timestamp=None, incr_timestamp=None):
raise Exception('Voice mode `{}` is not supported.'.format(self.vc.mode))

# Encrypt the payload with the nonce
if self.vc.mode in ('aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm_rtpsize'):
if self.vc.mode in ('aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
payload = self._secret_box.encrypt(plaintext=frame, nonce=bytes(nonce), aad=bytes(self._rtp_audio_header))
else:
payload = self._secret_box.encrypt(plaintext=frame, nonce=bytes(nonce))
Expand Down Expand Up @@ -210,7 +208,7 @@ def run(self):
data=data[8:],
)

self.vc.client.gw.events.emit('RTCPData', payload)
self.vc.client.events.emit('RTCPData', payload)
else:
sequence, timestamp, ssrc = struct_unpack_from('>HII', data, 2) # BE, unsigned short, 2x unsigned int

Expand Down Expand Up @@ -238,7 +236,11 @@ def run(self):
self.log.debug('[{}] [VoiceData] Received unsupported payload type, {}'.format(self.vc.channel_id, rtp.payload_type))
continue

nonce = bytearray(24)
if self.vc.mode in ('aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
nonce = bytearray(12) # 96-bits
else:
nonce = bytearray(24) # 192-bits is 24 bytes

if self.vc.mode in ('xsalsa20_poly1305_lite', 'xsalsa20_poly1305_lite_rtpsize', 'aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
nonce[:4] = data[-4:]
data = data[:-4]
Expand All @@ -251,13 +253,22 @@ def run(self):
self.log.debug('[{}] [VoiceData] Unsupported Encryption Mode, {}'.format(self.vc.channel_id, self.vc.mode))
continue

header_size = 12
if '_rtpsize' in self.vc.mode:
header_size += (rtp.csrc_count * 4)
if rtp.extension:
header_size += 4
ctxt = data[header_size:] # plus strip whatever additional bs is before the payload
else:
ctxt = data[12:]

try:
if self.vc.mode in ('aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm_rtpsize'):
data = self._secret_box.decrypt(ciphertext=bytes(data[12:]), nonce=bytes(nonce), aad=bytes(rtp))
if self.vc.mode in ('aead_xchacha20_poly1305_rtpsize', 'aead_aes256_gcm', 'aead_aes256_gcm_rtpsize'):
data = self._secret_box.decrypt(ciphertext=bytes(ctxt), nonce=bytes(nonce), aad=bytes(data[:header_size]))
else:
data = self._secret_box.decrypt(ciphertext=bytes(data[12:]), nonce=bytes(nonce))
except Exception:
self.log.debug('[{}] [VoiceData] Failed to decode data from ssrc {}'.format(self.vc.channel_id, rtp.ssrc))
data = self._secret_box.decrypt(ciphertext=bytes(ctxt), nonce=bytes(nonce))
except Exception as e:
self.log.debug('[{}] [VoiceData] Failed to decode data from ssrc {}: {} - {}'.format(self.vc.channel_id, rtp.ssrc, e.__class__.__name__, e))
continue

# RFC3550 Section 5.1 (Padding)
Expand Down Expand Up @@ -313,7 +324,7 @@ def run(self):
data=data,
)

self.vc.client.gw.events.emit('VoiceData', payload)
self.vc.client.events.emit('VoiceData', payload)

def send(self, data):
self.conn.sendto(data, (self.ip, self.port))
Expand Down

0 comments on commit c18845d

Please sign in to comment.