zoe_encrypted_storage/
lib.rs

1//! # Convergent Encryption with Brotli Compression
2//!
3//! This crate provides convergent encryption for self-encrypting files for untrusted storage.
4//! It uses Blake3 for key derivation, AES-256-GCM for encryption, and optional Brotli compression.
5//!
6//! ## Key Features
7//!
8//! - **Convergent Encryption**: Same content always produces the same ciphertext
9//! - **Content-Based Key Derivation**: Encryption key is derived from file content using Blake3
10//! - **Optional Compression**: Brotli compression reduces storage requirements
11//! - **Deterministic**: Perfect for deduplication and integrity verification
12//! - **No Key Management**: No need to store or manage encryption keys separately
13//!
14//! ## How It Works
15//!
16//! 1. **Compression** (optional): Content is compressed with Brotli if it reduces size
17//! 2. **Key Derivation**: File content is hashed with Blake3 to create a 32-byte encryption key
18//! 3. **Encryption**: AES-256-GCM encrypts the data using the derived key as both key and nonce
19//! 4. **Metadata**: Compression status, original size, and encryption key are tracked for decryption
20//!
21//! ## Usage Example
22//!
23//! ```rust
24//! use zoe_encrypted_storage::{ConvergentEncryption, CompressionConfig};
25//!
26//! // Basic encryption with default settings
27//! let content = b"Hello, world!";
28//! let (encrypted, info) = ConvergentEncryption::encrypt(content).unwrap();
29//! let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
30//! assert_eq!(content, decrypted.as_slice());
31//!
32//! // Custom compression settings
33//! let config = CompressionConfig {
34//!     enabled: true,
35//!     quality: 8,      // Higher compression (0-11)
36//!     min_size: 128,   // Only compress files > 128 bytes
37//! };
38//! let (encrypted, info) = ConvergentEncryption::encrypt_with_compression_config(content, config).unwrap();
39//! ```
40//!
41//! ## Security Considerations
42//!
43//! - **Convergent encryption reveals when identical files are stored**
44//! - **The encryption key is derived from content, so knowledge of content allows decryption**
45//! - **AES-256-GCM provides authenticated encryption**
46//! - **Blake3 provides cryptographically secure hashing**
47//!
48//! This approach is ideal for:
49//! - File deduplication systems
50//! - Content-addressable storage
51//! - Integrity verification
52//! - Untrusted storage where you control the content
53
54use aes_gcm::{
55    Aes256Gcm, Key, Nonce,
56    aead::{Aead, KeyInit},
57};
58use blake3::Hasher;
59use brotli::{CompressorWriter, Decompressor};
60use serde::{Deserialize, Serialize};
61use std::io::{Cursor, Read, Write};
62use thiserror::Error;
63
64/// Error types for convergent encryption operations
65#[derive(Debug, Error)]
66pub enum ConvergentEncryptionError {
67    /// Encryption operation failed
68    #[error("Encryption failed: {0}")]
69    EncryptionFailed(aes_gcm::Error),
70    /// Decryption operation failed
71    #[error("Decryption failed: {0}")]
72    DecryptionFailed(aes_gcm::Error),
73    /// Brotli compression failed
74    #[error("Compression failed: {0}")]
75    CompressionFailed(String),
76    /// Brotli decompression failed
77    #[error("Decompression failed: {0}")]
78    DecompressionFailed(String),
79    /// Invalid key length provided
80    #[error("Invalid key length")]
81    InvalidKeyLength,
82    /// Invalid nonce length provided
83    #[error("Invalid nonce length")]
84    InvalidNonceLength,
85}
86
87/// Encryption key derived from source content for convergent encryption
88///
89/// This is a 32-byte Blake3 hash of the content, used as both the encryption key
90/// and the first 12 bytes as the nonce for AES-256-GCM.
91pub type ConvergentEncryptionKey = [u8; 32];
92
93/// Metadata about the encryption operation
94///
95/// Contains information needed for decryption, including whether compression
96/// was applied and the original file size.
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
98pub struct ConvergentEncryptionInfo {
99    /// The encryption key derived from the content
100    pub key: ConvergentEncryptionKey,
101    /// Whether Brotli compression was applied
102    pub was_compressed: bool,
103    /// Original size of the content before encryption
104    pub source_size: usize,
105}
106
107/// Configuration for Brotli compression settings
108///
109/// Controls when and how compression is applied during encryption.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct CompressionConfig {
112    /// Whether to enable compression
113    pub enabled: bool,
114    /// Brotli compression quality (0-11, higher = better compression but slower)
115    pub quality: u32,
116    /// Minimum size threshold for compression (bytes)
117    pub min_size: usize,
118}
119
120impl Default for CompressionConfig {
121    fn default() -> Self {
122        Self {
123            enabled: true,
124            quality: 6,   // Good balance between speed and compression
125            min_size: 64, // Don't compress very small content
126        }
127    }
128}
129
130/// Convergent encryption using AES-256-GCM with Blake3 key derivation and Brotli compression
131///
132/// This implementation provides convergent encryption where the encryption key
133/// is derived from the content itself using Blake3. This enables:
134///
135/// - **Deterministic encryption**: Same content always produces the same ciphertext
136/// - **Content-based deduplication**: Identical files can be identified by their key
137/// - **No key management**: No need to store or manage encryption keys separately
138/// - **Integrity verification**: Can verify file integrity by re-deriving the key
139/// - **Optional compression**: Brotli compression reduces storage requirements
140///
141/// ## Security Model
142///
143/// - The 32-byte Blake3 hash serves as both the encryption key and nonce (first 12 bytes)
144/// - AES-256-GCM provides authenticated encryption with integrity protection
145/// - Compression is applied before encryption to maximize storage efficiency
146/// - The same content will always produce the same ciphertext, enabling deduplication
147///
148/// ## Limitations
149///
150/// - **Identical content detection**: Adversaries can determine when identical files are stored
151/// - **No forward secrecy**: If the content is known, the encryption can be broken
152///
153/// This approach is ideal for scenarios where you control the content and want
154/// to benefit from deduplication and integrity verification.
155pub struct ConvergentEncryption;
156
157impl ConvergentEncryption {
158    /// Derive encryption key from source content using Blake3
159    ///
160    /// This function calculates a 32-byte encryption key from the content using Blake3.
161    /// The same content will always produce the same key, enabling convergent encryption.
162    ///
163    /// The derived key is used as both the encryption key and nonce for AES-256-GCM.
164    ///
165    /// # Arguments
166    ///
167    /// * `content` - The source content to derive the key from
168    ///
169    /// # Returns
170    ///
171    /// A 32-byte encryption key derived from the content
172    fn derive_key(content: &[u8]) -> ConvergentEncryptionKey {
173        let mut hasher = Hasher::new();
174        hasher.update(content);
175        *hasher.finalize().as_bytes()
176    }
177
178    /// Compress content using Brotli if beneficial
179    ///
180    /// Attempts to compress the content using Brotli. Only applies compression if:
181    /// - Compression is enabled in the config
182    /// - Content size is above the minimum threshold
183    /// - Compressed size is smaller than the original
184    ///
185    /// # Arguments
186    ///
187    /// * `content` - The content to potentially compress
188    /// * `config` - Compression configuration settings
189    ///
190    /// # Returns
191    ///
192    /// A tuple containing the data to encrypt (compressed or original) and a flag
193    /// indicating whether compression was applied.
194    fn compress(
195        content: &[u8],
196        config: &CompressionConfig,
197    ) -> Result<(Vec<u8>, bool), ConvergentEncryptionError> {
198        if !config.enabled || content.len() < config.min_size {
199            return Ok((content.to_vec(), false));
200        }
201        let mut compressed = Vec::new();
202        {
203            let mut compressor = CompressorWriter::new(&mut compressed, 4096, config.quality, 22);
204            compressor
205                .write_all(content)
206                .map_err(|e| ConvergentEncryptionError::CompressionFailed(e.to_string()))?;
207        }
208        if compressed.len() < content.len() {
209            Ok((compressed, true))
210        } else {
211            Ok((content.to_vec(), false))
212        }
213    }
214
215    /// Decompress content using Brotli
216    ///
217    /// Attempts to decompress the content using Brotli. This function assumes
218    /// the content was compressed and will return an error if decompression fails.
219    ///
220    /// # Arguments
221    ///
222    /// * `content` - The compressed content to decompress
223    ///
224    /// # Returns
225    ///
226    /// The decompressed content
227    fn decompress(content: &[u8]) -> Result<Vec<u8>, ConvergentEncryptionError> {
228        let mut decompressed = Vec::new();
229        let mut decompressor = Decompressor::new(Cursor::new(content), 4096);
230        decompressor
231            .read_to_end(&mut decompressed)
232            .map_err(|e| ConvergentEncryptionError::DecompressionFailed(e.to_string()))?;
233        Ok(decompressed)
234    }
235
236    /// Encrypt plaintext using convergent encryption with custom compression settings
237    ///
238    /// This function performs the complete encryption process:
239    /// 1. Derives the encryption key from the content using Blake3
240    /// 2. Compresses the content if beneficial (based on config)
241    /// 3. Encrypts the data using AES-256-GCM with the derived key
242    /// 4. Returns both the ciphertext and metadata for decryption
243    ///
244    /// The same plaintext will always produce the same ciphertext,
245    /// enabling convergent encryption and deduplication.
246    ///
247    /// # Arguments
248    ///
249    /// * `plaintext` - The content to encrypt
250    /// * `config` - Compression configuration settings
251    ///
252    /// # Returns
253    ///
254    /// A tuple containing the encrypted ciphertext and metadata needed for decryption
255    ///
256    /// # Example
257    ///
258    /// ```rust
259    /// use zoe_encrypted_storage::{ConvergentEncryption, CompressionConfig};
260    ///
261    /// let content = b"Hello, world!";
262    ///
263    /// // Encrypt with custom compression
264    /// let config = CompressionConfig {
265    ///     enabled: true,
266    ///     quality: 8,
267    ///     min_size: 128,
268    /// };
269    /// let (encrypted, info) = ConvergentEncryption::encrypt_with_compression_config(content, config).unwrap();
270    /// ```
271    pub fn encrypt_with_compression_config(
272        plaintext: &[u8],
273        config: CompressionConfig,
274    ) -> Result<(Vec<u8>, ConvergentEncryptionInfo), ConvergentEncryptionError> {
275        let key = Self::derive_key(plaintext);
276        let filesize = plaintext.len();
277
278        // Step 1: Compress if beneficial
279        let (data_to_encrypt, was_compressed) = Self::compress(plaintext, &config)?;
280
281        // Step 2: Encrypt the data (compressed or original)
282        let enc_key = Key::<Aes256Gcm>::from_slice(&key);
283        let nonce = Nonce::from_slice(&key[..12]); // Use first 12 bytes as nonce
284
285        let cipher = Aes256Gcm::new(enc_key);
286        let ciphertext = cipher
287            .encrypt(nonce, &*data_to_encrypt)
288            .map_err(ConvergentEncryptionError::EncryptionFailed)?;
289
290        Ok((
291            ciphertext,
292            ConvergentEncryptionInfo {
293                key,
294                was_compressed,
295                source_size: filesize,
296            },
297        ))
298    }
299
300    /// Decrypt ciphertext using the provided metadata with automatic decompression
301    ///
302    /// This function performs the complete decryption process:
303    /// 1. Decrypts the ciphertext using AES-256-GCM
304    /// 2. Decompresses the data if it was compressed during encryption
305    /// 3. Returns the original plaintext
306    ///
307    /// # Arguments
308    ///
309    /// * `ciphertext` - The encrypted data to decrypt
310    /// * `config` - Metadata containing the key and compression information
311    ///
312    /// # Returns
313    ///
314    /// The original plaintext content
315    ///
316    /// # Example
317    ///
318    /// ```rust
319    /// use zoe_encrypted_storage::ConvergentEncryption;
320    ///
321    /// let content = b"Hello, world!";
322    /// let (encrypted, info) = ConvergentEncryption::encrypt(content).unwrap();
323    /// let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
324    /// assert_eq!(content, decrypted.as_slice());
325    /// ```
326    pub fn decrypt(
327        ciphertext: &[u8],
328        config: &ConvergentEncryptionInfo,
329    ) -> Result<Vec<u8>, ConvergentEncryptionError> {
330        // Step 1: Decrypt
331        let enc_key = Key::<Aes256Gcm>::from_slice(&config.key);
332        let nonce = Nonce::from_slice(&config.key[..12]); // Use first 12 bytes as nonce
333        let cipher = Aes256Gcm::new(enc_key);
334        let decrypted_data = cipher
335            .decrypt(nonce, ciphertext)
336            .map_err(ConvergentEncryptionError::DecryptionFailed)?;
337        // Step 2: Decompress if needed
338        if config.was_compressed {
339            return Self::decompress(&decrypted_data);
340        }
341        Ok(decrypted_data)
342    }
343
344    /// Convenience function: encrypt content with default compression settings
345    ///
346    /// This function combines key derivation, compression, and encryption in one step
347    /// using the default compression configuration. It's the simplest way to encrypt
348    /// content with convergent encryption.
349    ///
350    /// # Arguments
351    ///
352    /// * `content` - The content to encrypt
353    ///
354    /// # Returns
355    ///
356    /// A tuple containing the encrypted ciphertext and metadata needed for decryption
357    ///
358    /// # Example
359    ///
360    /// ```rust
361    /// use zoe_encrypted_storage::ConvergentEncryption;
362    ///
363    /// let content = b"Hello, world!";
364    /// let (encrypted, info) = ConvergentEncryption::encrypt(content).unwrap();
365    /// let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
366    /// assert_eq!(content, decrypted.as_slice());
367    /// ```
368    pub fn encrypt(
369        content: &[u8],
370    ) -> Result<(Vec<u8>, ConvergentEncryptionInfo), ConvergentEncryptionError> {
371        Self::encrypt_with_compression_config(content, CompressionConfig::default())
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn test_key_derivation_deterministic() {
381        let content = b"Hello, convergent encryption!";
382
383        let key1 = ConvergentEncryption::derive_key(content);
384        let key2 = ConvergentEncryption::derive_key(content);
385
386        // Same content should produce identical keys
387        assert_eq!(key1, key2);
388    }
389
390    #[test]
391    fn test_encrypt_decrypt_with_key() {
392        let content = b"Test message for encryption/decryption";
393
394        let (encrypted, info) = ConvergentEncryption::encrypt_with_compression_config(
395            content,
396            CompressionConfig::default(),
397        )
398        .unwrap();
399        let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
400
401        assert_ne!(&encrypted[..], content);
402        assert_eq!(content, decrypted.as_slice());
403    }
404
405    #[test]
406    fn test_convergent_encryption_deterministic() {
407        let plaintext = b"Hello, convergent encryption!";
408
409        // Encrypt the same content twice using the convenience function
410        let (encrypted1, _info1) = ConvergentEncryption::encrypt(plaintext).unwrap();
411        let (encrypted2, _info2) = ConvergentEncryption::encrypt(plaintext).unwrap();
412
413        // Should produce identical ciphertext (convergent property)
414        assert_eq!(encrypted1, encrypted2);
415    }
416
417    #[test]
418    fn test_convergent_encryption_decrypt() {
419        let plaintext = b"Test message for decryption";
420
421        let (encrypted, info) = ConvergentEncryption::encrypt(plaintext).unwrap();
422        let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
423
424        assert_eq!(plaintext, decrypted.as_slice());
425    }
426
427    #[test]
428    fn test_file_encryption() {
429        let file_content = b"This is a test file content for convergent encryption";
430
431        let (encrypted, info) = ConvergentEncryption::encrypt(file_content).unwrap();
432
433        // Key should be consistent
434        let expected_key = ConvergentEncryption::derive_key(file_content);
435        assert_eq!(info.key, expected_key);
436
437        // Should be able to decrypt
438        let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
439        assert_eq!(file_content, decrypted.as_slice());
440    }
441
442    #[test]
443    fn test_different_content_produces_different_ciphertext() {
444        let content1 = b"First content";
445        let content2 = b"Second content";
446
447        let (encrypted1, info1) = ConvergentEncryption::encrypt(content1).unwrap();
448        let (encrypted2, info2) = ConvergentEncryption::encrypt(content2).unwrap();
449
450        // Different content should produce different ciphertext
451        assert_ne!(encrypted1, encrypted2);
452        assert_ne!(encrypted1, content1);
453        assert_ne!(encrypted2, content2);
454        assert_ne!(info1.key, info2.key);
455    }
456
457    #[test]
458    fn test_different_content_produces_different_keys() {
459        let content1 = b"First content";
460        let content2 = b"Second content";
461
462        let key1 = ConvergentEncryption::derive_key(content1);
463        let key2 = ConvergentEncryption::derive_key(content2);
464
465        // Different content should produce different keys
466        assert_ne!(key1, key2);
467    }
468
469    #[test]
470    fn test_key_can_be_used_for_deduplication() {
471        let content1 = b"Same content";
472        let content2 = b"Same content";
473        let content3 = b"Different content";
474
475        let key1 = ConvergentEncryption::derive_key(content1);
476        let key2 = ConvergentEncryption::derive_key(content2);
477        let key3 = ConvergentEncryption::derive_key(content3);
478
479        // Same content should have same key (for deduplication)
480        assert_eq!(key1, key2);
481
482        // Different content should have different keys
483        assert_ne!(key1, key3);
484        assert_ne!(key2, key3);
485    }
486
487    #[test]
488    fn test_compression_works() {
489        // Create compressible content (repeating pattern)
490        let compressible_content = b"Hello world! ".repeat(100);
491
492        let config = CompressionConfig {
493            enabled: true,
494            quality: 6,
495            min_size: 64,
496        };
497
498        let (encrypted, info) =
499            ConvergentEncryption::encrypt_with_compression_config(&compressible_content, config)
500                .unwrap();
501        let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
502
503        assert_eq!(compressible_content, decrypted.as_slice());
504    }
505
506    #[test]
507    fn test_compression_disabled() {
508        let content = b"Test content";
509
510        let config = CompressionConfig {
511            enabled: false,
512            quality: 6,
513            min_size: 64,
514        };
515
516        let (encrypted, info) =
517            ConvergentEncryption::encrypt_with_compression_config(content, config).unwrap();
518        let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
519
520        assert_eq!(content, decrypted.as_slice());
521    }
522
523    #[test]
524    fn test_compression_below_min_size() {
525        let small_content = b"Small";
526
527        let config = CompressionConfig {
528            enabled: true,
529            quality: 6,
530            min_size: 100, // Larger than content
531        };
532
533        let (encrypted, info) =
534            ConvergentEncryption::encrypt_with_compression_config(small_content, config).unwrap();
535        let decrypted = ConvergentEncryption::decrypt(&encrypted, &info).unwrap();
536
537        assert_eq!(small_content, decrypted.as_slice());
538    }
539
540    #[test]
541    fn test_compression_deterministic() {
542        let compressible_content = b"Repeating pattern ".repeat(50);
543
544        let config = CompressionConfig {
545            enabled: true,
546            quality: 6,
547            min_size: 64,
548        };
549
550        let (encrypted1, _info1) = ConvergentEncryption::encrypt_with_compression_config(
551            &compressible_content,
552            config.clone(),
553        )
554        .unwrap();
555        let (encrypted2, _info2) =
556            ConvergentEncryption::encrypt_with_compression_config(&compressible_content, config)
557                .unwrap();
558
559        // Should produce identical ciphertext even with compression
560        assert_eq!(encrypted1, encrypted2);
561    }
562}