1+ from datetime import datetime
2+ import json
3+ from unittest .mock import Mock , patch
4+
15from database .models import Dataset
2- from harvester .harvest import harvest_job_starter
6+ from harvester .harvest import HarvestSource , harvest_job_starter
37
48
59class TestHarvestJobSync :
@@ -88,3 +92,122 @@ def test_harvest_job_force_update(
8892 assert set (initial_harvest_record_ids ) == set (updated_harvest_record_ids )
8993 for slug , record_id in initial_harvest_record_ids .items ():
9094 assert updated_harvest_record_ids [slug ] != record_id
95+
96+ def test_waf_force_harvest_skips_datetime_filter (
97+ self ,
98+ interface ,
99+ organization_data ,
100+ source_data_waf_iso19115_2 ,
101+ iso19115_2_transform ,
102+ ):
103+ """
104+ Force harvest for WAF sources should skip filter_waf_files_by_datetime
105+ to ensure all records are updated, not just those with newer modified dates.
106+ """
107+ interface .add_organization (organization_data )
108+ interface .add_harvest_source (source_data_waf_iso19115_2 )
109+
110+ # Mock external dependencies
111+ mock_waf_files = [
112+ {
113+ "identifier" : f"{ source_data_waf_iso19115_2 ['url' ]} file1.xml" ,
114+ "modified_date" : datetime (2024 , 1 , 1 ),
115+ },
116+ {
117+ "identifier" : f"{ source_data_waf_iso19115_2 ['url' ]} file2.xml" ,
118+ "modified_date" : datetime (2024 , 1 , 2 ),
119+ },
120+ ]
121+
122+ valid_xml = """<?xml version="1.0" encoding="UTF-8"?>
123+ <gmi:MI_Metadata xmlns:gmi="http://www.isotc211.org/2005/gmi">
124+ <gmd:fileIdentifier>
125+ <gco:CharacterString>test-file</gco:CharacterString>
126+ </gmd:fileIdentifier>
127+ </gmi:MI_Metadata>"""
128+
129+ # Mock MDTranslator response
130+ mock_mdt_response = Mock ()
131+ mock_mdt_response .status_code = 200
132+ mock_mdt_response .json .return_value = {
133+ "writerOutput" : json .dumps (iso19115_2_transform ),
134+ "readerStructureMessages" : [],
135+ "readerValidationMessages" : [],
136+ }
137+
138+ with patch ("harvester.harvest.traverse_waf" , return_value = mock_waf_files ), \
139+ patch ("harvester.harvest.download_file" , return_value = valid_xml ), \
140+ patch ("harvester.harvest.requests.post" , return_value = mock_mdt_response ):
141+
142+ # Initial harvest
143+ harvest_job = interface .add_harvest_job (
144+ {
145+ "status" : "new" ,
146+ "harvest_source_id" : source_data_waf_iso19115_2 ["id" ],
147+ }
148+ )
149+
150+ job_id = harvest_job .id
151+ job_type = harvest_job .job_type
152+ assert job_type == "harvest"
153+ harvest_job_starter (job_id , job_type )
154+
155+ harvest_job = interface .get_harvest_job (job_id )
156+ job_err = interface .get_harvest_job_errors_by_job (job_id )
157+ record_err = interface .get_harvest_record_errors_by_job (job_id )
158+
159+ assert len (job_err ) == 0
160+ assert len (record_err ) == 0
161+ assert harvest_job .status == "complete"
162+
163+ initial_records_added = harvest_job .records_added
164+ assert initial_records_added > 0
165+
166+ datasets_initial = interface .db .query (Dataset ).all ()
167+ assert len (datasets_initial ) == initial_records_added
168+ initial_harvest_record_ids = {
169+ dataset .slug : dataset .harvest_record_id for dataset in datasets_initial
170+ }
171+
172+ # Force harvest - should skip filter_waf_files_by_datetime
173+ with patch .object (
174+ HarvestSource , "filter_waf_files_by_datetime"
175+ ) as mock_filter :
176+ harvest_job = interface .add_harvest_job (
177+ {
178+ "status" : "new" ,
179+ "harvest_source_id" : source_data_waf_iso19115_2 ["id" ],
180+ "job_type" : "force_harvest" ,
181+ }
182+ )
183+
184+ job_id = harvest_job .id
185+ job_type = harvest_job .job_type
186+ assert job_type == "force_harvest"
187+ harvest_job_starter (job_id , job_type )
188+
189+ # Verify filter was NOT called
190+ mock_filter .assert_not_called ()
191+
192+ harvest_job = interface .get_harvest_job (job_id )
193+
194+ # Assert all records are resynced
195+ assert len (job_err ) == 0
196+ assert len (record_err ) == 0
197+ assert harvest_job .status == "complete"
198+ assert harvest_job .records_added == 0
199+ assert harvest_job .records_deleted == 0
200+ assert harvest_job .records_errored == 0
201+ assert harvest_job .records_ignored == 0
202+ assert harvest_job .records_total == initial_records_added
203+ assert harvest_job .records_updated == initial_records_added
204+ assert harvest_job .records_validated == initial_records_added
205+
206+ datasets_after = interface .db .query (Dataset ).all ()
207+ assert len (datasets_after ) == initial_records_added
208+ updated_harvest_record_ids = {
209+ dataset .slug : dataset .harvest_record_id for dataset in datasets_after
210+ }
211+ assert set (initial_harvest_record_ids ) == set (updated_harvest_record_ids )
212+ for slug , record_id in initial_harvest_record_ids .items ():
213+ assert updated_harvest_record_ids [slug ] != record_id
0 commit comments