diff --git a/finetuning/adapters/pii-redactor-3b-varied/0000200_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0000200_adapters.safetensors new file mode 100644 index 0000000..cbe64dd Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0000200_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0000400_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0000400_adapters.safetensors new file mode 100644 index 0000000..1467818 Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0000400_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0000600_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0000600_adapters.safetensors new file mode 100644 index 0000000..c930b72 Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0000600_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0000800_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0000800_adapters.safetensors new file mode 100644 index 0000000..610ec9a Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0000800_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0001000_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0001000_adapters.safetensors new file mode 100644 index 0000000..6e380eb Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0001000_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0001200_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0001200_adapters.safetensors new file mode 100644 index 0000000..4a5ffe7 Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0001200_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0001400_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0001400_adapters.safetensors new file mode 100644 index 0000000..955a67c Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0001400_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0001600_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0001600_adapters.safetensors new file mode 100644 index 0000000..38d1553 Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0001600_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0001800_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0001800_adapters.safetensors new file mode 100644 index 0000000..e4ffd9b Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0001800_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/0002000_adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/0002000_adapters.safetensors new file mode 100644 index 0000000..05ccec1 Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/0002000_adapters.safetensors differ diff --git a/finetuning/adapters/pii-redactor-3b-varied/adapter_config.json b/finetuning/adapters/pii-redactor-3b-varied/adapter_config.json new file mode 100644 index 0000000..e8198f6 --- /dev/null +++ b/finetuning/adapters/pii-redactor-3b-varied/adapter_config.json @@ -0,0 +1,40 @@ +{ + "adapter_path": "adapters/pii-redactor-3b-varied", + "batch_size": 2, + "config": null, + "data": "data", + "fine_tune_type": "lora", + "grad_accumulation_steps": 1, + "grad_checkpoint": false, + "iters": 2000, + "learning_rate": 1e-05, + "lora_parameters": { + "rank": 8, + "dropout": 0.0, + "scale": 20.0 + }, + "lr_schedule": null, + "mask_prompt": false, + "max_seq_length": 2048, + "model": "mlx-community/Llama-3.2-3B-Instruct-4bit", + "num_layers": 16, + "optimizer": "adam", + "optimizer_config": { + "adam": {}, + "adamw": {}, + "muon": {}, + "sgd": {}, + "adafactor": {} + }, + "project_name": null, + "report_to": null, + "resume_adapter_file": null, + "save_every": 200, + "seed": 0, + "steps_per_eval": 100, + "steps_per_report": 10, + "test": false, + "test_batches": 500, + "train": true, + "val_batches": 25 +} \ No newline at end of file diff --git a/finetuning/adapters/pii-redactor-3b-varied/adapters.safetensors b/finetuning/adapters/pii-redactor-3b-varied/adapters.safetensors new file mode 100644 index 0000000..05ccec1 Binary files /dev/null and b/finetuning/adapters/pii-redactor-3b-varied/adapters.safetensors differ diff --git a/finetuning/redact.py b/finetuning/redact.py index a682090..6a5fddc 100755 --- a/finetuning/redact.py +++ b/finetuning/redact.py @@ -6,7 +6,7 @@ print("Loading model...", file=sys.stderr) model, tokenizer = load( "mlx-community/Llama-3.2-3B-Instruct-4bit", - adapter_path="adapters/pii-redactor-3b" + adapter_path="adapters/pii-redactor-3b-varied" ) # Get input @@ -130,16 +130,19 @@ ALWAYS REMEMBER: never summarize a ticket. redact per instructions but never summarize or shorten excessively -You must output in JSON format with: -{ - "redacted_text": "the fully redacted text", - "properties_redacted": { - "lastname1": "original last name", - "email1": "original email", - "ip1": "original IP", - ... - } -}""" + You must output in JSON format with: + { + "redacted_text": "the fully redacted text", + "properties_redacted": { + "lastname1": "original last name", + "email1": "original email", + "ip1": "original IP", + ... + } + } + Do not include empty fields. Only include properties that were actually found and redacted. + +""" messages = [ {"role": "system", "content": system_prompt},