@@ -146,6 +146,38 @@ def test_no_candidates_when_all_within_limit(self, test_db_manager):
146146 candidates = test_db_manager .get_cleanup_candidates (keep_per_species = 60 )
147147 assert len (candidates ) == 0
148148
149+ def test_returns_extra_column (self , test_db_manager ):
150+ """Cleanup candidates should include extra column for filename reconstruction."""
151+ import json
152+
153+ base_time = datetime (2024 , 1 , 15 , 10 , 0 , 0 )
154+ for i in range (70 ):
155+ detection = {
156+ 'timestamp' : (base_time - timedelta (hours = i )).isoformat (),
157+ 'group_timestamp' : (base_time - timedelta (hours = i )).isoformat (),
158+ 'scientific_name' : 'Testus birdus' ,
159+ 'common_name' : 'Test Bird' ,
160+ 'confidence' : 0.75 + (i % 20 ) * 0.01 ,
161+ 'latitude' : 40.7128 ,
162+ 'longitude' : - 74.0060 ,
163+ 'cutoff' : 0.5 ,
164+ 'sensitivity' : 0.75 ,
165+ 'overlap' : 0.25 ,
166+ 'extra' : {'source_label' : 'Backyard_Mic' },
167+ 'audio_source' : 'alsa_input.usb-test' ,
168+ }
169+ test_db_manager .insert_detection (detection )
170+
171+ candidates = test_db_manager .get_cleanup_candidates (keep_per_species = 60 )
172+ assert len (candidates ) == 10
173+
174+ for candidate in candidates :
175+ assert 'extra' in candidate , "extra column must be present in cleanup candidates"
176+ extra = candidate ['extra' ]
177+ if isinstance (extra , str ):
178+ extra = json .loads (extra )
179+ assert extra .get ('source_label' ) == 'Backyard_Mic'
180+
149181
150182class TestGetDiskUsage :
151183 """Tests for storage_manager.get_disk_usage()"""
@@ -193,6 +225,24 @@ def test_constructs_correct_paths(self):
193225 assert 'American_Robin' in paths ['spectrogram_path' ]
194226 assert '85' in paths ['audio_path' ] # Confidence as percentage
195227
228+ def test_constructs_paths_with_source_label (self ):
229+ """Should include source_label suffix in filenames when present in extra."""
230+ with patch ('config.settings.EXTRACTED_AUDIO_DIR' , '/app/data/audio/extracted_songs' ):
231+ with patch ('config.settings.SPECTROGRAM_DIR' , '/app/data/spectrograms' ):
232+ from core .storage_manager import get_detection_files
233+
234+ detection = {
235+ 'common_name' : 'American Robin' ,
236+ 'confidence' : 0.85 ,
237+ 'timestamp' : '2024-01-15T10:30:00' ,
238+ 'extra' : '{"source_label": "Backyard_Mic"}' ,
239+ }
240+
241+ paths = get_detection_files (detection )
242+
243+ assert paths ['audio_path' ].endswith ('_Backyard_Mic.mp3' )
244+ assert paths ['spectrogram_path' ].endswith ('_Backyard_Mic.webp' )
245+
196246 def test_fallback_to_legacy_colon_pattern (self ):
197247 """Should fall back to legacy colon-pattern files if dash-pattern not found."""
198248 with tempfile .TemporaryDirectory () as tmpdir :
@@ -461,3 +511,167 @@ def test_warns_when_target_unachievable(self, populated_db_for_cleanup):
461511 # Should flag that target is not achievable
462512 assert not result ['target_achievable' ]
463513 assert not result ['target_reached' ]
514+
515+ def test_cleanup_resolves_multi_source_filenames (self , test_db_manager ):
516+ """Cleanup should correctly resolve filenames for multi-source detections."""
517+ base_time = datetime (2024 , 1 , 15 , 10 , 0 , 0 )
518+
519+ for i in range (70 ):
520+ detection = {
521+ 'timestamp' : (base_time - timedelta (hours = i )).isoformat (),
522+ 'group_timestamp' : (base_time - timedelta (hours = i )).isoformat (),
523+ 'scientific_name' : 'Testus birdus' ,
524+ 'common_name' : 'Test Bird' ,
525+ 'confidence' : 0.75 + (i % 20 ) * 0.01 ,
526+ 'latitude' : 40.7128 ,
527+ 'longitude' : - 74.0060 ,
528+ 'cutoff' : 0.5 ,
529+ 'sensitivity' : 0.75 ,
530+ 'overlap' : 0.25 ,
531+ 'extra' : {'source_label' : 'Backyard_Mic' },
532+ 'audio_source' : 'alsa_input.usb-test' ,
533+ }
534+ test_db_manager .insert_detection (detection )
535+
536+ with patch ('config.settings.BASE_DIR' , '/tmp' ):
537+ with patch ('config.settings.EXTRACTED_AUDIO_DIR' , '/tmp/audio' ):
538+ with patch ('config.settings.SPECTROGRAM_DIR' , '/tmp/spectrograms' ):
539+ with patch ('config.settings.user_settings' , {'storage' : {}}):
540+ with patch ('core.storage_manager.get_disk_usage' ) as mock_usage :
541+ mock_usage .return_value = {
542+ 'total_bytes' : 100 * 1024 ** 3 ,
543+ 'used_bytes' : 90 * 1024 ** 3 ,
544+ 'free_bytes' : 10 * 1024 ** 3 ,
545+ 'percent_used' : 90.0
546+ }
547+
548+ with patch ('core.storage_manager.get_file_size' ) as mock_size :
549+ mock_size .return_value = 300 * 1024
550+
551+ with patch ('core.storage_manager.delete_detection_files' ) as mock_delete :
552+ mock_delete .return_value = {
553+ 'deleted_audio' : True ,
554+ 'deleted_spectrogram' : True ,
555+ 'bytes_freed' : 300 * 1024
556+ }
557+
558+ from core .storage_manager import cleanup_storage
559+
560+ result = cleanup_storage (
561+ test_db_manager ,
562+ target_percent = 80 ,
563+ keep_per_species = 60
564+ )
565+
566+ # Pre-fix: all would be skipped_missing because
567+ # filenames lacked the _Backyard_Mic suffix
568+ assert result ['skipped_missing' ] == 0
569+ assert result ['files_deleted' ] > 0
570+
571+ # Verify delete received detection with extra
572+ for call_args in mock_delete .call_args_list :
573+ det = call_args [0 ][0 ]
574+ assert 'extra' in det
575+
576+ def test_cleanup_fetches_candidates_once (self , test_db_manager ):
577+ """cleanup_storage should fetch candidates only once, not twice."""
578+ base_time = datetime (2024 , 1 , 15 , 10 , 0 , 0 )
579+ for i in range (70 ):
580+ detection = {
581+ 'timestamp' : (base_time - timedelta (hours = i )).isoformat (),
582+ 'group_timestamp' : (base_time - timedelta (hours = i )).isoformat (),
583+ 'scientific_name' : 'Testus birdus' ,
584+ 'common_name' : 'Test Bird' ,
585+ 'confidence' : 0.75 + (i % 20 ) * 0.01 ,
586+ 'latitude' : 40.7128 ,
587+ 'longitude' : - 74.0060 ,
588+ 'cutoff' : 0.5 ,
589+ 'sensitivity' : 0.75 ,
590+ 'overlap' : 0.25 ,
591+ }
592+ test_db_manager .insert_detection (detection )
593+
594+ with patch ('config.settings.BASE_DIR' , '/tmp' ):
595+ with patch ('config.settings.user_settings' , {'storage' : {}}):
596+ with patch ('core.storage_manager.get_disk_usage' ) as mock_usage :
597+ mock_usage .return_value = {
598+ 'total_bytes' : 100 * 1024 ** 3 ,
599+ 'used_bytes' : 90 * 1024 ** 3 ,
600+ 'free_bytes' : 10 * 1024 ** 3 ,
601+ 'percent_used' : 90.0
602+ }
603+
604+ with patch ('core.storage_manager.get_file_size' , return_value = 0 ):
605+ with patch .object (
606+ test_db_manager , 'get_cleanup_candidates' ,
607+ wraps = test_db_manager .get_cleanup_candidates
608+ ) as mock_gc :
609+ from core .storage_manager import cleanup_storage
610+
611+ cleanup_storage (
612+ test_db_manager ,
613+ target_percent = 80 ,
614+ keep_per_species = 60
615+ )
616+
617+ assert mock_gc .call_count == 1 , (
618+ f"get_cleanup_candidates called { mock_gc .call_count } times, expected 1"
619+ )
620+
621+ def test_target_reached_on_final_deletion (self , test_db_manager ):
622+ """target_reached should be True when final deletion crosses the threshold."""
623+ base_time = datetime (2024 , 1 , 15 , 10 , 0 , 0 )
624+ # Insert 62 detections so we get exactly 2 candidates with keep=60
625+ for i in range (62 ):
626+ detection = {
627+ 'timestamp' : (base_time - timedelta (hours = i )).isoformat (),
628+ 'group_timestamp' : (base_time - timedelta (hours = i )).isoformat (),
629+ 'scientific_name' : 'Testus birdus' ,
630+ 'common_name' : 'Test Bird' ,
631+ 'confidence' : 0.75 + (i % 20 ) * 0.01 ,
632+ 'latitude' : 40.7128 ,
633+ 'longitude' : - 74.0060 ,
634+ 'cutoff' : 0.5 ,
635+ 'sensitivity' : 0.75 ,
636+ 'overlap' : 0.25 ,
637+ }
638+ test_db_manager .insert_detection (detection )
639+
640+ with patch ('config.settings.BASE_DIR' , '/tmp' ):
641+ with patch ('config.settings.EXTRACTED_AUDIO_DIR' , '/tmp/audio' ):
642+ with patch ('config.settings.SPECTROGRAM_DIR' , '/tmp/spectrograms' ):
643+ with patch ('config.settings.user_settings' , {'storage' : {}}):
644+ # Disk: 81% used on 100MB total → need to free 1MB to reach 80%
645+ with patch ('core.storage_manager.get_disk_usage' ) as mock_usage :
646+ mock_usage .return_value = {
647+ 'total_bytes' : 100 * 1024 ** 2 ,
648+ 'used_bytes' : 81 * 1024 ** 2 ,
649+ 'free_bytes' : 19 * 1024 ** 2 ,
650+ 'percent_used' : 81.0
651+ }
652+
653+ with patch ('core.storage_manager.get_file_size' ) as mock_size :
654+ mock_size .return_value = 600 * 1024 # 600KB per file
655+
656+ with patch ('core.storage_manager.delete_detection_files' ) as mock_delete :
657+ mock_delete .return_value = {
658+ 'deleted_audio' : True ,
659+ 'deleted_spectrogram' : True ,
660+ 'bytes_freed' : 600 * 1024
661+ }
662+
663+ from core .storage_manager import cleanup_storage
664+
665+ result = cleanup_storage (
666+ test_db_manager ,
667+ target_percent = 80 ,
668+ keep_per_species = 60
669+ )
670+
671+ # 2 candidates, each frees 600KB = 1.2MB
672+ # bytes_to_free = 81MB - 80MB = 1MB
673+ # First deletion: 600KB < 1MB, loop continues
674+ # Second deletion: 1.2MB >= 1MB, but loop ends
675+ # Post-loop check should catch this
676+ assert result ['target_reached' ] is True
677+ assert result ['files_deleted' ] == 2
0 commit comments