2222TS_SYNC_BYTE = b'\x47 '
2323TS_PACKET_SIZE = 188
2424
25+ # Common file magic bytes for detecting anti-hotlink responses
26+ JPEG_MAGIC = b'\xff \xd8 \xff '
27+ PNG_MAGIC = b'\x89 PNG'
28+ GIF_MAGIC = b'GIF8'
29+
2530
2631class SegmentDownloader :
2732 """Download video segments with multi-threading and retry logic"""
@@ -56,22 +61,30 @@ def __init__(
5661 # Create output directory
5762 self .output_dir .mkdir (parents = True , exist_ok = True )
5863
59- def _is_valid_ts_content (self , data : bytes ) -> bool :
64+ def _is_valid_ts_content (self , data : bytes ) -> tuple [ bool , str ] :
6065 """
6166 Validate if the content is a valid MPEG-TS file.
62- Returns False if it looks like HTML/text error page .
67+ Returns (is_valid, error_reason) tuple .
6368 """
6469 if not data or len (data ) < TS_PACKET_SIZE :
65- return False
70+ return False , "Content too small"
71+
72+ # Check for image files (anti-hotlinking protection)
73+ if data [:3 ] == JPEG_MAGIC :
74+ return False , "Server returned JPEG image (anti-hotlinking protection)"
75+ if data [:4 ] == PNG_MAGIC :
76+ return False , "Server returned PNG image (anti-hotlinking protection)"
77+ if data [:4 ] == GIF_MAGIC :
78+ return False , "Server returned GIF image (anti-hotlinking protection)"
6679
6780 # Check if it starts with HTML (error page)
6881 if data [:5 ].lower () in (b'<!doc' , b'<html' , b'<?xml' ):
69- return False
82+ return False , "Server returned HTML error page"
7083
7184 # Check for common error text patterns
7285 lower_start = data [:500 ].lower ()
7386 if b'error' in lower_start or b'forbidden' in lower_start or b'denied' in lower_start :
74- return False
87+ return False , "Server returned error response"
7588
7689 # Check for TS sync byte at expected positions
7790 # TS packets are 188 bytes, sync byte should appear at 0, 188, 376, etc.
@@ -81,32 +94,62 @@ def _is_valid_ts_content(self, data: bytes) -> bool:
8194 sync_count += 1
8295
8396 # If we found sync bytes at expected positions, it's likely valid
84- return sync_count >= 2
97+ if sync_count >= 2 :
98+ return True , ""
99+
100+ return False , "Invalid TS format (no sync bytes found)"
85101
86102 def _decrypt_segment (self , data : bytes , segment_index : int ) -> bytes :
87103 """Decrypt AES-128 encrypted segment"""
88104 if not self .encryption_key :
89105 return data
90106
91- try :
92- # Use provided IV or derive from segment index
93- if self .encryption_iv :
94- iv = self .encryption_iv
107+ # Log key info on first segment
108+ if segment_index == 0 :
109+ logger .info (f"Encryption key (first 4 bytes): { self .encryption_key [:4 ].hex ()} " )
110+ if self .encryption_iv is not None :
111+ logger .info (f"Using provided IV: { self .encryption_iv .hex ()} " )
95112 else :
96- # Default IV is segment sequence number as 16-byte big-endian
97- iv = segment_index .to_bytes (16 , byteorder = 'big' )
113+ logger .info ("No IV provided, will use segment index" )
114+
115+ try :
116+ # Try multiple IV strategies
117+ iv_strategies = []
98118
99- cipher = AES .new (self .encryption_key , AES .MODE_CBC , iv )
100- decrypted = cipher .decrypt (data )
119+ # Strategy 1: Use provided IV if specified (HLS spec compliant)
120+ if self .encryption_iv is not None :
121+ iv_strategies .append (("provided IV" , self .encryption_iv ))
101122
102- # Remove PKCS7 padding
103- try :
104- decrypted = unpad (decrypted , AES .block_size )
105- except ValueError :
106- # Some streams don't use proper padding
107- pass
123+ # Strategy 2: Use segment index as IV (common non-compliant streams)
124+ iv_strategies .append (("segment index IV" , segment_index .to_bytes (16 , byteorder = 'big' )))
125+
126+ # Strategy 3: Use zeros IV if not already tried
127+ if self .encryption_iv is None or self .encryption_iv != bytes (16 ):
128+ iv_strategies .append (("zeros IV" , bytes (16 )))
129+
130+ for strategy_name , iv in iv_strategies :
131+ cipher = AES .new (self .encryption_key , AES .MODE_CBC , iv )
132+ decrypted = cipher .decrypt (data )
133+
134+ # Remove PKCS7 padding
135+ try :
136+ decrypted = unpad (decrypted , AES .block_size )
137+ except ValueError :
138+ # Some streams don't use proper padding
139+ pass
140+
141+ # Check if decryption produced valid TS data
142+ if decrypted [:1 ] == TS_SYNC_BYTE :
143+ if segment_index < 3 : # Log first few segments
144+ logger .info (f"Segment { segment_index } : Decryption successful with { strategy_name } " )
145+ return decrypted
108146
147+ # None of the strategies worked
148+ logger .warning (f"Segment { segment_index } : All decryption strategies failed (first byte after zeros IV: { hex (decrypted [0 ]) if decrypted else 'empty' } )" )
149+
150+ # Return the last decrypted result (with zeros IV) - let ffmpeg try to handle it
109151 return decrypted
152+
110153 except Exception as e :
111154 logger .warning (f"Decryption failed for segment { segment_index } : { e } " )
112155 return data # Return original data if decryption fails
@@ -133,6 +176,11 @@ def download_segment(
133176 try :
134177 logger .debug (f"Downloading segment { index } : { url } " )
135178
179+ # Log headers for first segment to help debug anti-hotlink issues
180+ if index == 0 and retry_count == 0 :
181+ logger .info (f"Segment download headers: { self .headers } " )
182+ logger .info (f"First segment URL: { url } " )
183+
136184 response = self .session .get (
137185 url ,
138186 headers = self .headers ,
@@ -161,12 +209,23 @@ def download_segment(
161209 content = self ._decrypt_segment (content , index )
162210
163211 # Validate content is actually a TS file (not an error page)
164- if not self ._is_valid_ts_content (content ):
165- # Log first 200 bytes for debugging
166- preview = content [:200 ]
167- logger .error (f"Segment { index } content is not valid TS data" )
168- logger .error (f"Content preview (first 200 bytes): { preview } " )
169- raise ValueError (f"Invalid TS content - possibly HTML error page or encrypted data" )
212+ is_valid , error_reason = self ._is_valid_ts_content (content )
213+ if not is_valid :
214+ # Check if this looks like encrypted data that we couldn't decrypt
215+ # In that case, still save it and let ffmpeg try to handle it
216+ skip_validation = os .environ .get ('SKIP_TS_VALIDATION' , 'false' ).lower () == 'true'
217+
218+ # Always skip validation for encrypted streams where decryption produced non-image data
219+ if self .encryption_key and not content [:3 ] in (JPEG_MAGIC , PNG_MAGIC [:3 ], GIF_MAGIC [:3 ]):
220+ logger .warning (f"Segment { index } : { error_reason } - saving anyway for ffmpeg to process" )
221+ elif skip_validation :
222+ logger .warning (f"Segment { index } : { error_reason } - validation skipped" )
223+ else :
224+ # Log first 200 bytes for debugging
225+ preview = content [:200 ]
226+ logger .error (f"Segment { index } : { error_reason } " )
227+ logger .error (f"Content preview (first 200 bytes): { preview } " )
228+ raise ValueError (error_reason )
170229
171230 # Write validated content to file
172231 with open (output_path , 'wb' ) as f :
0 commit comments