Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions src/filler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from pdfrw import PdfReader, PdfWriter
from pdfrw import PdfReader, PdfWriter, PdfName
from src.llm import LLM
from datetime import datetime


class Filler:
def __init__(self):
pass
Expand Down Expand Up @@ -39,11 +38,26 @@ def fill_form(self, pdf_form: str, llm: LLM):
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
if i < len(answers_list):
annot.V = f"{answers_list[i]}"
annot.AP = None
val = str(answers_list[i])

# CHECKBOX / RADIO BUTTON LOGIC
if annot.FT == "/Btn":
if val == "Yes":
# Set both value and Appearance State to 'Yes' or 'On'
# Most PDFs use /Yes, but some use /On. /Yes is the safest default.
annot.V = PdfName("Yes")
annot.AS = PdfName("Yes")
else:
annot.V = PdfName("Off")
annot.AS = PdfName("Off")

# STANDARD TEXT BOX LOGIC
else:
annot.V = f"{val}"
annot.AP = None # Refresh appearance for text

i += 1
else:
# Stop if we run out of answers
break

PdfWriter().write(output_pdf, pdf)
Expand Down
23 changes: 16 additions & 7 deletions src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def build_prompt(self, current_field):

def main_loop(self):
# self.type_check_all()
for field in self._target_fields.keys():
for field in self._target_fields:
prompt = self.build_prompt(field)
# print(prompt)
# ollama_url = "http://localhost:11434/api/generate"
Expand Down Expand Up @@ -85,19 +85,28 @@ def main_loop(self):

def add_response_to_json(self, field, value):
"""
this method adds the following value under the specified field,
or under a new field if the field doesn't exist, to the json dict
Adds the value to the json dict, with normalization for boolean/checkbox logic.
"""
value = value.strip().replace('"', "")
# Clean and lowercase the value for easier comparison
clean_value = value.strip().replace('"', "").lower()
parsed_value = None

if value != "-1":
parsed_value = value
# Logic to map LLM text to PDF checkbox states
if clean_value in ["yes", "true", "x", "checked", "1"]:
parsed_value = "Yes"
elif clean_value in ["no", "false", "unchecked", "0", "-1"]:
parsed_value = "Off"
else:
# Fallback for standard text fields
parsed_value = value.strip().replace('"', "") if value != "-1" else None

if ";" in value:
if ";" in value and parsed_value:
parsed_value = self.handle_plural_values(value)

if field in self._json.keys():
# Ensure we are appending to a list if it exists
if not isinstance(self._json[field], list):
self._json[field] = [self._json[field]]
self._json[field].append(parsed_value)
else:
self._json[field] = parsed_value
Expand Down
5 changes: 3 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller
from typing import Union

def input_fields(num_fields: int):
fields = []
Expand Down Expand Up @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
prepared_pdf = "temp_outfile.pdf"
prepare_form(file, prepared_pdf)

Expand All @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
num_fields = 0

controller = Controller()
controller.fill_form(user_input, fields, file)
controller.fill_form(user_input, descriptive_fields, file)