diff --git a/definitions/reporting/test_predictions.ipynb b/definitions/reporting/test_predictions.ipynb
new file mode 100644
index 0000000..d7ec89f
--- /dev/null
+++ b/definitions/reporting/test_predictions.ipynb
@@ -0,0 +1,1123 @@
+### <font color='#4285f4'>Overview</font>
+Overview: This demo will show how to use the RAG (retrieval augmented generation) pattern directly within BigQuery on a set of PDFs (unstructured data).
+
+Process Flow:
+1. Create a storage acccount and copy seed data
+2. Populate BigQuery
+3. Create the external connections (Gemini, Vertex AI Embeddings, Vertex AI Document Processors, BigLake)
+4. Set the IAM permissions
+5. Create an object table
+6. Extract the text from each PDF using Vertex AI Document Processor
+7. Create embeddings using Vertex AI Text Embeddings
+8. Use Gemini by searching the embeddings (Vector Search) and injecting them into the context window
+9. Use Gemini with results Grounded with Google Search
+10. Use Gemini with “Response Schema” which will return our response in a structured format (typically JSON, but in BigQuery a table will be returned)
+
+
+
+Cost:
+* Approximate cost: Less than $1
+
+Author:
+* Adam Paternostro
+# Architecture Diagram
+from IPython.display import Image
+Image(url='https://storage.googleapis.com/data-analytics-golden-demo/colab-diagrams/BigQuery-Analytics-using-GenAI.png', width=1200)
+### <font color='#4285f4'>Video Walkthrough</font>
+[![Video](https://storage.googleapis.com/data-analytics-golden-demo/colab-videos/BigQuery-Analytics-with-GenAI.png)](https://storage.googleapis.com/data-analytics-golden-demo/colab-videos/BigQuery-Analytics-with-GenAI.mp4)
+
+
+from IPython.display import HTML
+
+HTML("""
+<video width="800" height="600" controls>
+  <source src="https://storage.googleapis.com/data-analytics-golden-demo/colab-videos/BigQuery-Analytics-with-GenAI.mp4" type="video/mp4">
+  Your browser does not support the video tag.
+</video>
+""")
+### <font color='#4285f4'>License</font>
+```
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+```
+### <font color='#4285f4'>Initialize</font>
+from PIL import Image
+from IPython.display import HTML
+import IPython.display
+import google.auth
+import requests
+import json
+import uuid
+import base64
+import os
+import cv2
+import random
+import time
+import datetime
+import base64
+import random
+import logging
+# Set these (run this cell to verify the output)
+
+# chagne to "us", "eu", etc.
+location = "us"
+
+# Get the current date and time
+now = datetime.datetime.now()
+
+# Format the date and time as desired
+formatted_date = now.strftime("%Y-%m-%d-%H-%M")
+
+# Get some values using gcloud
+project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
+user = !(gcloud auth list --filter=status:ACTIVE --format="value(account)")
+
+if len(user) != 1:
+  raise RuntimeError(f"user is not set: {user}")
+user = user[0]
+
+print(f"project_id = {project_id}")
+print(f"user = {user}")
+### <font color='#4285f4'>Prerequisite to run this notebook</font>
+!gcloud services enable compute.googleapis.com \
+                        bigquery.googleapis.com \
+                        aiplatform.googleapis.com \
+                        dataform.googleapis.com \
+                        documentai.googleapis.com \
+    --project "{project_id}"
+### <font color='#4285f4'>Helper Methods</font>
+##### restAPIHelper
+Calls the Google Cloud REST API using the current users credentials.
+def restAPIHelper(url: str, http_verb: str, request_body: str) -> str:
+  """Calls the Google Cloud REST API passing in the current users credentials"""
+
+  import requests
+  import google.auth
+  import json
+
+  # Get an access token based upon the current user
+  creds, project = google.auth.default()
+  auth_req = google.auth.transport.requests.Request()
+  creds.refresh(auth_req)
+  access_token=creds.token
+
+  headers = {
+    "Content-Type" : "application/json",
+    "Authorization" : "Bearer " + access_token
+  }
+
+  if http_verb == "GET":
+    response = requests.get(url, headers=headers)
+  elif http_verb == "POST":
+    response = requests.post(url, json=request_body, headers=headers)
+  elif http_verb == "PUT":
+    response = requests.put(url, json=request_body, headers=headers)
+  elif http_verb == "PATCH":
+    response = requests.patch(url, json=request_body, headers=headers)
+  elif http_verb == "DELETE":
+    response = requests.delete(url, headers=headers)
+  else:
+    raise RuntimeError(f"Unknown HTTP verb: {http_verb}")
+
+  if response.status_code == 200:
+    return json.loads(response.content)
+    #image_data = json.loads(response.content)["predictions"][0]["bytesBase64Encoded"]
+  else:
+    error = f"Error restAPIHelper -> ' Status: '{response.status_code}' Text: '{response.text}'"
+    raise RuntimeError(error)
+##### RunQuery (on BigQuery)
+def RunQuery(sql):
+  import time
+  from google.cloud import bigquery
+  client = bigquery.Client()
+
+  if (sql.startswith("SELECT") or sql.startswith("WITH")):
+      df_result = client.query(sql).to_dataframe()
+      return df_result
+  else:
+    job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)
+    query_job = client.query(sql, job_config=job_config)
+
+    # Check on the progress by getting the job's updated state.
+    query_job = client.get_job(
+        query_job.job_id, location=query_job.location
+    )
+    print("Job {} is currently in state {} with error result of {}".format(query_job.job_id, query_job.state, query_job.error_result))
+
+    while query_job.state != "DONE":
+      time.sleep(2)
+      query_job = client.get_job(
+          query_job.job_id, location=query_job.location
+          )
+      print("Job {} is currently in state {} with error result of {}".format(query_job.job_id, query_job.state, query_job.error_result))
+
+    if query_job.error_result == None:
+      return True
+    else:
+      raise Exception(query_job.error_result)
+##### Create Vertex AI connection
+def createExternal_BigLake_VertexAI_RemoteFunctions_Connection(project_id, location, connection_name):
+  """Creates a BigLake, Vertex AI, Remote Function connection."""
+
+  # First find the connection
+  # https://cloud.google.com/bigquery/docs/reference/bigqueryconnection/rest/v1/projects.locations.connections/list
+  url = f"https://bigqueryconnection.googleapis.com/v1/projects/{project_id}/locations/{location}/connections"
+
+  # Gather existing connections
+  json_result = restAPIHelper(url, "GET", None)
+  print(f"createBigLakeConnection (GET) json_result: {json_result}")
+
+  # Test to see if connection exists, if so return
+  if "connections" in json_result:
+    for item in json_result["connections"]:
+      print(f"BigLake Connection: {item['name']}")
+      # "projects/756740881369/locations/us/connections/biglake-notebook-connection"
+      # NOTE: We cannot test the complete name since it contains the project number and not id
+      if item["name"].endswith(f"/locations/{location}/connections/{connection_name}"):
+        print("Connection already exists")
+        serviceAccountId = item["cloudResource"]["serviceAccountId"]
+        return serviceAccountId
+
+  # Create the connection
+  # https://cloud.google.com/bigquery/docs/reference/bigqueryconnection/rest/v1/projects.locations.connections/create
+  print("Creating Vertex AI Connection")
+
+  url = f"https://bigqueryconnection.googleapis.com/v1/projects/{project_id}/locations/{location}/connections?connectionId={connection_name}"
+
+  request_body = {
+      "friendlyName": connection_name,
+      "description": "BigLake, Vertex AI, Remote Function connection",
+      "cloudResource": {}
+  }
+
+  json_result = restAPIHelper(url, "POST", request_body)
+
+  serviceAccountId = json_result["cloudResource"]["serviceAccountId"]
+  print("BigLake Connection created: ", serviceAccountId)
+  return serviceAccountId
+
+##### Create a GCS bucket
+def createGoogleCloudStorageBucket(project_id, bucket_name, location):
+  """Creates a Google Cloud Storage bucket."""
+
+  # First find the bucket
+  # https://cloud.google.com/storage/docs/json_api/v1/buckets/list
+  url = f"https://storage.googleapis.com/storage/v1/b?project={project_id}"
+
+  # Gather existing buckets
+  json_result = restAPIHelper(url, "GET", None)
+  print(f"createGoogleCloudStorageBucket (GET) json_result: {json_result}")
+
+  # Test to see if connection exists, if so return
+  if "items" in json_result:
+    for item in json_result["items"]:
+      print(f"Bucket Id / Name: ({item['id']} / {item['name']}")
+      if item["id"] == bucket_name:
+        print("Bucket already exists")
+        return
+
+  # Create the bucket
+  # https://cloud.google.com/storage/docs/json_api/v1/buckets/insert
+  print("Creating Google Cloud Bucket")
+
+  url = f"https://storage.googleapis.com/storage/v1/b?project={project_id}&predefinedAcl=private&predefinedDefaultObjectAcl=private&projection=noAcl"
+
+  request_body = {
+      "name": bucket_name,
+      "location": location
+  }
+
+  json_result = restAPIHelper(url, "POST", request_body)
+  print()
+  print(f"json_result: {json_result}")
+  print()
+  print("BigLake Bucket created: ", bucket_name)
+##### Set IAM permissions on bucket
+def setBucketIamPolicy(bucket_name, accountWithPrefix, role):
+  """Sets the bucket IAM policy."""
+
+  # Get the current bindings (if the account has access then skip)
+  # https://cloud.google.com/storage/docs/json_api/v1/buckets/getIamPolicy
+
+  url = f"https://storage.googleapis.com/storage/v1/b/{bucket_name}/iam"
+  json_result = restAPIHelper(url, "GET", None)
+  print(f"setBucketIamPolicy (GET) json_result: {json_result}")
+
+  # Test to see if permissions exist
+  if "bindings" in json_result:
+    for item in json_result["bindings"]:
+      members = item["members"]
+      for member in members:
+        if member == accountWithPrefix:
+          print("Permissions exist")
+          return
+
+  # Take the existing bindings and we need to append the new permission
+  # Otherwise we loose the existing permissions
+
+  bindings = json_result["bindings"]
+  new_permission = {
+      "role": role,
+      "members": [ accountWithPrefix ]
+      }
+
+  bindings.append(new_permission)
+
+  # https://cloud.google.com/storage/docs/json_api/v1/buckets/setIamPolicy
+  url = f"https://storage.googleapis.com/storage/v1/b/{bucket_name}/iam"
+
+  request_body = { "bindings" : bindings }
+
+  print(f"Permission bindings: {bindings}")
+
+
+  json_result = restAPIHelper(url, "PUT", request_body)
+  print()
+  print(f"json_result: {json_result}")
+  print()
+  print(f"Bucket IAM Permissions set for {accountWithPrefix} {role}")
+##### Set Project Level IAM Permissions
+def setProjectLevelIamPolicy(project_id, accountWithPrefix, role):
+  """Sets the Project Level IAM policy."""
+
+  # Get the current bindings (if the account has access then skip)
+  # https://cloud.google.com/resource-manager/reference/rest/v1/projects/getIamPolicy
+  url = f"https://cloudresourcemanager.googleapis.com/v1/projects/{project_id}:getIamPolicy"
+
+  request_body = { }
+  json_result = restAPIHelper(url, "POST", request_body)
+  print(f"setProjectLevelIamPolicy (GET) json_result: {json_result}")
+
+  # Test to see if permissions exist
+  if "bindings" in json_result:
+    for item in json_result["bindings"]:
+      if item["role"] == role:
+        members = item["members"]
+        for member in members:
+          if member == accountWithPrefix:
+            print("Permissions exist")
+            return
+
+  # Take the existing bindings and we need to append the new permission
+  # Otherwise we loose the existing permissions
+  if "bindings" in json_result:
+    bindings = json_result["bindings"]
+  else:
+    bindings = []
+
+  new_permission = {
+      "role": role,
+      "members": [ accountWithPrefix ]
+      }
+
+  bindings.append(new_permission)
+
+  # https://cloud.google.com/resource-manager/reference/rest/v1/projects/setIamPolicy
+  url = f"https://cloudresourcemanager.googleapis.com/v1/projects/{project_id}:setIamPolicy"
+
+  request_body = { "policy" : {
+      "bindings" : bindings
+      }
+  }
+
+  print(f"Permission bindings: {bindings}")
+
+  json_result = restAPIHelper(url, "POST", request_body)
+  print()
+  print(f"json_result: {json_result}")
+  print()
+  print(f"Project Level IAM Permissions set for {accountWithPrefix} {role}")
+##### Delete Document Processor
+def deleteDocumentProcessor(project_id, location, processor_name, processor_type):
+  """Creates a Vertex AI document process if it does not exist."""
+
+  # First find the item
+  # https://cloud.google.com/document-ai/docs/reference/rest/v1/projects.locations.processors/list
+  url = f"https://{location}-documentai.googleapis.com/v1/projects/{project_id}/locations/{location}/processors"
+
+  # Gather existing items
+  json_result = restAPIHelper(url, "GET", None)
+  print(f"createDocumentProcessor (GET) json_result: {json_result}")
+
+  # Test to see if processor exists, if so return
+  if "processors" in json_result:
+    for item in json_result["processors"]:
+      print(f"Process Name: {item['name']}")
+      # "projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c"
+      # NOTE: We do not know the random number at the end so test the type and display name
+      if item["type"] == processor_type and item["displayName"] == processor_name:
+        print("Found processor")
+        name = item["name"]
+        url = f"https://{location}-documentai.googleapis.com/v1/{name}"
+        json_result = restAPIHelper(url, "DELETE", None)
+        print("Document Processor Deleted")
+
+  print("Document Processor not found to delete")
+##### Create a Document Processor (to process the PDFs)
+def createDocumentProcessor(project_id, location, processor_name, processor_type):
+  """Creates a Vertex AI document process if it does not exist."""
+
+  # First find the item
+  # https://cloud.google.com/document-ai/docs/reference/rest/v1/projects.locations.processors/list
+  url = f"https://{location}-documentai.googleapis.com/v1/projects/{project_id}/locations/{location}/processors"
+
+  # Gather existing items
+  json_result = restAPIHelper(url, "GET", None)
+  print(f"createDocumentProcessor (GET) json_result: {json_result}")
+
+  # Test to see if processor exists, if so return
+  if "processors" in json_result:
+    for item in json_result["processors"]:
+      print(f"Process Name: {item['name']}")
+      # "projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c"
+      # NOTE: We do not know the random number at the end so test the type and display name
+      if item["type"] == processor_type and item["displayName"] == processor_name:
+        print("Processor already exists")
+        defaultProcessorVersion = item["defaultProcessorVersion"]
+        return defaultProcessorVersion
+
+  # Create the processor
+  # https://cloud.google.com/document-ai/docs/reference/rest/v1/projects.locations.processors/create
+  print("Creating Document Processor")
+
+  url = f"https://{location}-documentai.googleapis.com/v1/projects/{project_id}/locations/{location}/processors"
+
+  request_body = {
+    "type": processor_type,
+    "displayName": processor_name
+  }
+
+  """
+  INVALID_ARGUMENT: Document pages in non-imageless mode exceed the limit: 15 got 58.
+  Try using imageless mode to increase the limit to 100. [type.googleapis.com/util.MessageSetPayload='[google.rpc.error_details_ext]
+  { message: "Document pages in non-imageless mode exceed the limit: 15 got 58. Try using imageless mode to increase the limit to 100."
+  details { [type.googleapis.com/google.rpc.ErrorInfo] { reason: "PAGE_LIMIT_EXCEEDED" domain: "documentai.googleapis.com" metadata
+  { key: "page_limit" value: "100" } metadata { key: "pages" value: "58" } } } }']
+  """
+
+  json_result = restAPIHelper(url, "POST", request_body)
+
+  """
+  {'name': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c',
+  'type': 'OCR_PROCESSOR',
+  'displayName': 'vertex_ai_ocr_processor',
+  'state': 'ENABLED',
+  'processEndpoint': 'https://us-documentai.googleapis.com/v1/projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c:process',
+  'createTime': '2025-01-21T19:25:07.980401Z',
+  'defaultProcessorVersion': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained-ocr-v2.0-2023-06-02',
+  'processorVersionAliases': [{'alias': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained',
+    'processorVersion': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained-ocr-v1.0-2020-09-23'},
+    {'alias': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained-next',
+    'processorVersion': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained-ocr-v1.1-2022-09-12'},
+    {'alias': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/rc',
+    'processorVersion': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained-ocr-v2.1-2024-08-07'},
+    {'alias': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/stable',
+    'processorVersion': 'projects/530963301545/locations/us/processors/b7e8a9fe78cf7e9c/processorVersions/pretrained-ocr-v2.0-2023-06-02'}]}
+  """
+
+  defaultProcessorVersion = json_result["defaultProcessorVersion"]
+  print("Document Processor created: ", defaultProcessorVersion)
+  return defaultProcessorVersion
+
+### <font color='#4285f4'>MAIN CODE - Setup the Environment</font>
+- Create the bucket and copy data from a shared location
+- Create our BigLake / Vertex AI connection in BigQuery
+- The connection creates a service principal so we will grant access to thie principal to our bucket and permissions to call vertex endpoints
+- Create our tables and load with data
+- Create our models in BigQuery
+##### Create our bucket and copy data for BigQuery / Storage
+# Create a bucket
+bucket_name = project_id
+createGoogleCloudStorageBucket(project_id, bucket_name, location)
+# Copy our data (CSV files).  We want the files in our local bucket with local location.
+source_path = "gs://data-analytics-golden-demo/cymbal-consumer-finance/*"
+dest_path = f"gs://{bucket_name}/cymbal-consumer-finance/"
+print(f"Copying data from {source_path} to {dest_path}")
+print("This may take a few minutes...")
+!gsutil -m -q cp -r {source_path} {dest_path}
+print("Copy [data] is complete")
+
+
+# Copy our data (PDFs files).  We want the files in our local bucket with local location.
+source_path = "gs://data-analytics-golden-demo/cymbal-consumer-finance-pdfs/*"
+dest_path = f"gs://{bucket_name}/pdfs/"
+print(f"Copying data from {source_path} to {dest_path}")
+print("This may take a few minutes...")
+!gsutil -m -q cp -r {source_path} {dest_path}
+print("Copy [pdfs] is complete")
+
+print(f"To view the files: https://console.cloud.google.com/storage/browser/{bucket_name}")
+##### Create the external BigQuery connection for BigLake / Vertex
+# Create our connection for BigLake / Vertex AI
+
+connection_name = "biglake_vertexai_connection"
+biglake_vertexai_connection_serviceAccountId = createExternal_BigLake_VertexAI_RemoteFunctions_Connection(project_id, location, connection_name)
+print(f"biglake_vertexai_connection_serviceAccountId: {biglake_vertexai_connection_serviceAccountId}")
+##### Grant the service account created by the external connection IAM permissions
+# Grant the Biglake / Vertex AI External connection Service Principal permissions to call Vertex Models / Endpoints
+
+################################################################################################################################################
+# NOTE: You might need to wait a minute or two before running this.  It can fail if you run this too quickly after creating the connection
+################################################################################################################################################
+
+# To call Docuemnt API
+# Exception: {'reason': 'invalidQuery', 'location': 'query', 'message': "Permission denied for document processor 'projects/530963301545/locations/us/
+#processors/processor_id'. Please ensure that (1) The processor 'processor_id' exists in project 530963301545, region 'us', and is active. (2)
+# The connection's service account bqcx-530963301545-z8r3@gcp-sa-bigquery-condel.iam.gserviceaccount.com has roles/documentai.viewer role
+#in the project 530963301545. More details: Permission 'documentai.processors.get' denied on resource '//documentai.googleapis.com/projects/
+# 530963301545/locations/us/processors/processor_id' (or it may not exist)."}
+setProjectLevelIamPolicy(project_id, f"serviceAccount:{biglake_vertexai_connection_serviceAccountId}", "roles/documentai.viewer")
+
+
+# To call Vision API
+setProjectLevelIamPolicy(project_id, f"serviceAccount:{biglake_vertexai_connection_serviceAccountId}", "roles/serviceusage.serviceUsageConsumer")
+setProjectLevelIamPolicy(project_id, f"serviceAccount:{biglake_vertexai_connection_serviceAccountId}", "roles/serviceusage.serviceUsageConsumer")
+
+# To call GENERATE TEXT
+setProjectLevelIamPolicy(project_id, f"serviceAccount:{biglake_vertexai_connection_serviceAccountId}","roles/aiplatform.user")
+
+# Grant the current user and the service principal for our BigLake / Vertex AI connection access to the bucket
+# We want our BigLake / Vertex AI connections to have access to the files in our bucket
+
+setBucketIamPolicy(bucket_name, f"serviceAccount:{biglake_vertexai_connection_serviceAccountId}", "roles/storage.objectAdmin")
+setBucketIamPolicy(bucket_name, f"user:{user}", "roles/storage.admin")
+##### Create our BigQuery Dataset and populate it with data
+dataset_name = "cymbal_consumer_finance"
+
+sql = f"""
+CREATE SCHEMA IF NOT EXISTS {dataset_name} OPTIONS(location = '{location}');
+"""
+RunQuery(sql)
+
+sql = f"""LOAD DATA OVERWRITE `{dataset_name}.customers`
+(
+  customer_id STRING,
+  first_name STRING,
+  last_name STRING,
+  date_of_birth DATE,
+  email STRING,
+  phone_number STRING,
+  creation_date DATE,
+  life_event STRING
+)
+FROM FILES (format = 'CSV', skip_leading_rows = 1, uris = ['gs://{bucket_name}/cymbal-consumer-finance/ccf_csv_tables_customers.csv']);
+"""
+RunQuery(sql)
+
+sql = f"""
+LOAD DATA OVERWRITE `{dataset_name}.loan_applications`
+(
+  application_id STRING,
+  customer_id STRING,
+  application_date DATE,
+  product_type STRING,
+  sub_product STRING,
+  loan_amount FLOAT64,
+  description STRING,
+  application_status STRING,
+  approval_date DATE,
+  disbursement_date DATE,
+  application_channel STRING,
+  marketing_cost FLOAT64,
+)
+FROM FILES (format = 'CSV', skip_leading_rows = 1, uris = ['gs://{bucket_name}/cymbal-consumer-finance/ccf_csv_tables_loan_applications.csv']);
+"""
+RunQuery(sql)
+
+sql = f"""
+LOAD DATA OVERWRITE `{dataset_name}.loan_repayments`
+(
+  repayment_id STRING,
+  loan_id STRING,
+  repayment_date DATE,
+  amount_due FLOAT64,
+  amount_paid FLOAT64,
+  payment_status STRING,
+  days_past_due INT64
+)
+FROM FILES (format = 'CSV', skip_leading_rows = 1, uris = ['gs://{bucket_name}/cymbal-consumer-finance/ccf_csv_tables_loan_repayments.csv']);
+"""
+RunQuery(sql)
+
+sql = f"""
+LOAD DATA OVERWRITE `{dataset_name}.marketing_costs`
+(
+  cost_id STRING,
+  channel STRING,
+  product_type STRING,
+  cost_per_lead FLOAT64,
+)
+FROM FILES (format = 'CSV', skip_leading_rows = 1, uris = ['gs://{bucket_name}/cymbal-consumer-finance/ccf_csv_tables_marketing_costs.csv']);
+"""
+RunQuery(sql)
+
+print(f"You should now see a new dataset in BigQuery with several tables loaded with data.")
+##### Create our OCR and Layout Processors (to parse our PDF)
+processor_name = "vertex_ai_ocr_processor"
+processor_type = "OCR_PROCESSOR"
+
+# In case you need to change options (you should delete and then recreate)
+# deleteDocumentProcessor(project_id, location, processor_name, processor_type)
+
+vertex_processor_name = createDocumentProcessor(project_id, location, processor_name, processor_type)
+print(f"vertex_processor_name: {vertex_processor_name}")
+
+# Layout Parser (for chunks)
+layout_processor_name = "vertex_ai_layout_processor"
+layout_processor_type = "LAYOUT_PARSER_PROCESSOR"
+
+layout_processor_name = createDocumentProcessor(project_id, location, layout_processor_name, layout_processor_type)
+print(f"layout_processor_name: {layout_processor_name}")
+##### Create our BQML Models in our BigQuery Dataset
+# Create the remote connection to each Vertex AI service. (Vision, Gemini Pro, Embeddings, etc.)
+# The models use the external connection
+
+#####################################################################################################################
+# NOTE: You might get an error that the processor does not exist (you should re-run this cell)
+#####################################################################################################################
+
+sql = f"""CREATE MODEL IF NOT EXISTS `{project_id}.{dataset_name}.layout-connection`
+REMOTE WITH CONNECTION `{project_id}.{location}.{connection_name}`
+OPTIONS (
+  remote_service_type = 'cloud_ai_document_v1',
+  document_processor='{layout_processor_name}');
+"""
+
+RunQuery(sql)
+
+sql = f"""CREATE MODEL IF NOT EXISTS `{project_id}.{dataset_name}.document-connection`
+REMOTE WITH CONNECTION `{project_id}.{location}.{connection_name}`
+OPTIONS (
+  remote_service_type = 'cloud_ai_document_v1',
+  document_processor='{vertex_processor_name}');
+"""
+
+RunQuery(sql)
+
+sql = f"""CREATE MODEL IF NOT EXISTS `{project_id}.{dataset_name}.vision-connection`
+REMOTE WITH CONNECTION `{project_id}.{location}.{connection_name}`
+OPTIONS (remote_service_type = 'cloud_ai_vision_v1');"""
+
+RunQuery(sql)
+
+print(f"Created cloud_ai_vision_v1: {sql}")
+
+sql = f"""CREATE MODEL IF NOT EXISTS `{project_id}.{dataset_name}.gemini-15-pro`
+REMOTE WITH CONNECTION `{project_id}.{location}.{connection_name}`
+OPTIONS (endpoint = 'gemini-1.5-pro');"""
+
+RunQuery(sql)
+
+print(f"Created gemini-1.5-pro: {sql}")
+
+#sql = f"""CREATE MODEL IF NOT EXISTS `{project_id}.{dataset_name}.gemini-2-0-flash`
+#REMOTE WITH CONNECTION `{project_id}.{location}.{connection_name}`
+#OPTIONS (endpoint = 'gemini-2.0-flash');"""
+
+#RunQuery(sql)
+
+#print(f"Created gemini-2.0-flash: {sql}")
+
+
+sql = f"""CREATE MODEL IF NOT EXISTS `{project_id}.{dataset_name}.vertexai-textembedding`
+REMOTE WITH CONNECTION `{project_id}.{location}.{connection_name}`
+OPTIONS (endpoint = 'text-embedding-005');"""
+
+RunQuery(sql)
+
+print(f"Created text-embedding-005: {sql}")
+### <font color='#4285f4'>MAIN CODE - Demo</font>
+1. Create an Object table over a set of PDFs
+2. Force a refresh of the object table
+3. Process the PDFs with the OCR processor which creates 1 large text extract
+4. Process the PDFs with the Layout processor which will chunk our PDF text
+5. Create embeddings on the chunked PDF text
+6. Search the embedddings
+7. Use Gemini and the RAG pattern to use our embedding search results to answer a question
+##### Create an Object table over a set of PDFs
+# Create the object table over the files
+
+object_table_name = "object_table_pdfs"
+
+sql = f"""
+CREATE OR REPLACE EXTERNAL TABLE `{project_id}.{dataset_name}.{object_table_name}`
+WITH CONNECTION `{project_id}.{location}.{connection_name}`
+OPTIONS (
+    object_metadata="DIRECTORY",
+    uris = ['gs://{bucket_name}/pdfs/*.pdf'],
+    max_staleness=INTERVAL 30 MINUTE,
+    metadata_cache_mode="MANUAL"
+    );
+"""
+
+RunQuery(sql)
+##### Call the Refresh on the Object table so it picks up the files in storage
+# Since the table is set to MANUAL refresh, refresh the table so we see the files
+
+sql = f"CALL BQ.REFRESH_EXTERNAL_METADATA_CACHE('{project_id}.{dataset_name}.{object_table_name}')"
+RunQuery(sql)
+
+# Show the data
+sql=f"""SELECT *
+          FROM `{project_id}.{dataset_name}.{object_table_name}`
+          LIMIT 20;"""
+
+result=RunQuery(sql)
+
+result
+# Get a signed url so we can show in this notebook
+
+sql=f"""SELECT *
+  FROM EXTERNAL_OBJECT_TRANSFORM(TABLE `{project_id}.{dataset_name}.{object_table_name}`,['SIGNED_URL'])
+  WHERE uri LIKE '%loan_application_0d2e87d5-6337-4fc6-b6ed-5e6f35df596b.pdf%'"""
+
+df=RunQuery(sql)
+
+for row in df.itertuples():
+  uri = row.uri
+  signed_url = row.signed_url
+
+print(f"uri: {uri}")
+print(f"signed_url: {signed_url}")
+
+print()
+print()
+print("Notice that we have handwriting and different types of fonts.")
+print()
+print()
+
+# Chrome shows a warning
+#iframe = IPython.display.IFrame(src=signed_url, width=800, height=600)
+#display(iframe)
+
+html = f"""
+<object data="{signed_url}" type="application/pdf" width="800" height="600">
+  <p>Your browser does not support inline PDFs.</p>
+  alt : <a href="{signed_url}" target="_blank">Sample Loan PDF</a>
+</object>
+"""
+IPython.display.HTML(html)
+
+##### We want to process each PDF document in our object table (OCR Processor)
+# This is 1 big pdf text block using the OCR processor
+
+pdfs_document_ocr_processor_table_name = "pdfs_document_ocr_processor"
+
+sql = f"""
+CREATE OR REPLACE TABLE `{project_id}.{dataset_name}.{pdfs_document_ocr_processor_table_name}` AS (
+  SELECT *
+    FROM ML.PROCESS_DOCUMENT(MODEL `{project_id}.{dataset_name}.document-connection`,
+                             TABLE `{project_id}.{dataset_name}.{object_table_name}`)
+);"""
+
+RunQuery(sql)
+
+# Show the data
+sql=f"""SELECT *
+          FROM `{project_id}.{dataset_name}.{pdfs_document_ocr_processor_table_name}`
+        LIMIT 10;"""
+
+result=RunQuery(sql)
+
+result
+##### We want to process each PDF document in our object table (Layout Processor)
+# This is chunks of text blocks using the layout processor
+# We would need to "manually" (or semantic) chunk this to create embeddings
+
+pdfs_document_layout_processor_table_name = "pdfs_document_layout_processor"
+process_options = '{"layout_config": {"chunking_config": {"chunk_size": 100}}}'
+
+sql = f"""
+CREATE OR REPLACE TABLE `{project_id}.{dataset_name}.{pdfs_document_layout_processor_table_name}` AS (
+  SELECT *
+    FROM ML.PROCESS_DOCUMENT(MODEL `{project_id}.{dataset_name}.layout-connection`,
+                             TABLE `{project_id}.{dataset_name}.{object_table_name}`,
+                             PROCESS_OPTIONS => (JSON '{process_options}')
+  )
+);"""
+
+RunQuery(sql)
+
+# Show the data
+sql=f"""SELECT uri, ml_process_document_result
+          FROM `{project_id}.{dataset_name}.{pdfs_document_layout_processor_table_name}`
+        LIMIT 5;"""
+
+result=RunQuery(sql)
+
+result
+##### Parse the JSON from the PDF extraction. Use BQ JSON functions.
+pdfs_document_layout_processor_chunks_table_name = "pdfs_document_layout_processor_chunks"
+
+sql = f"""CREATE OR REPLACE TABLE `{project_id}.{dataset_name}.{pdfs_document_layout_processor_chunks_table_name}` AS
+SELECT uri,
+       JSON_EXTRACT_SCALAR(json , '$.chunkId') AS chunk_id,
+       JSON_EXTRACT_SCALAR(json , '$.content') AS content,
+       JSON_EXTRACT_SCALAR(json , '$.pageFooters[0].text') AS page_footers_text,
+       JSON_EXTRACT_SCALAR(json , '$.pageSpan.pageStart') AS page_span_start,
+       JSON_EXTRACT_SCALAR(json , '$.pageSpan.pageEnd') AS page_span_end
+  FROM `{project_id}.{dataset_name}.{pdfs_document_layout_processor_table_name}`,
+       UNNEST(JSON_EXTRACT_ARRAY(ml_process_document_result.chunkedDocument.chunks, '$')) json
+"""
+
+RunQuery(sql)
+
+# Show the data
+sql=f"""SELECT *
+          FROM `{project_id}.{dataset_name}.{pdfs_document_layout_processor_chunks_table_name}`
+        ORDER BY uri, chunk_id
+        LIMIT 10"""
+
+result=RunQuery(sql)
+
+result
+##### Use ML.GENERATE_EMBEDDING to create embeddings for our extract PDF text
+# In a real life scenerio, you would not pass in TABLE `{project_id}.{dataset_name}.{pdfs_document_layout_processor_chunks_table_name}`
+# You would pass in a query for items that do not already EXIST in the embedding table.  The TABLE parameter could also be a SQL statement.
+
+# The "content" field will automatically be pass to the model as the column to embed
+
+
+pdfs_document_layout_processor_chunks_table_name = "pdfs_document_layout_processor_chunks"
+pdfs_document_embeddings_table_name = "pdfs_document_embeddings"
+
+sql = f"""
+CREATE OR REPLACE TABLE `{project_id}.{dataset_name}.{pdfs_document_embeddings_table_name}` AS
+SELECT uri,
+       chunk_id,
+       content,
+       ml_generate_embedding_result as vector_embedding,
+       ml_generate_embedding_statistics,
+       ml_generate_embedding_status,
+  FROM ML.GENERATE_EMBEDDING(MODEL `{project_id}.{dataset_name}.vertexai-textembedding`,
+                             TABLE `{project_id}.{dataset_name}.{pdfs_document_layout_processor_chunks_table_name}`,
+                             STRUCT(
+                               TRUE AS flatten_json_output,
+                               'SEMANTIC_SIMILARITY' as task_type,
+                               768 AS output_dimensionality
+                             ))
+"""
+
+RunQuery(sql)
+##### Show our embedded data (one first element from embedding array)
+# Show the data
+
+sql=f"""SELECT uri, chunk_id, content, [vector_embedding[0],vector_embedding[1]] as vector_embedding, ml_generate_embedding_statistics, ml_generate_embedding_status
+          FROM `{project_id}.{dataset_name}.{pdfs_document_embeddings_table_name}`
+      ORDER BY uri, chunk_id
+         LIMIT 10;"""
+
+result=RunQuery(sql)
+
+result
+##### Search our embeddings table for a search string
+# The search results will contain all the data that is semaniticly close to our search string
+# The shorter the distance the more pertinent the result
+
+vector_search_string = 'always wanted to own a home'
+
+options = '{"fraction_lists_to_search": 0.01}'
+
+sql = f"""SELECT base.uri as uri,
+       base.chunk_id as chunk_id,
+       base.content as content,
+       distance
+  FROM VECTOR_SEARCH(TABLE `{project_id}.{dataset_name}.{pdfs_document_embeddings_table_name}`,
+                    'vector_embedding', -- column in table to search
+                    (SELECT ml_generate_embedding_result,
+                            content AS query
+                       FROM ML.GENERATE_EMBEDDING(MODEL `{project_id}.{dataset_name}.vertexai-textembedding`,
+                                                 (SELECT '{vector_search_string}' AS content),
+                                                  STRUCT(TRUE AS flatten_json_output,
+                                                        'SEMANTIC_SIMILARITY' as task_type,
+                                                        768 AS output_dimensionality) -- struct
+                     )  -- question embedding
+                     ), -- vector search
+        top_k => 10,
+        OPTIONS => '{options}')
+ORDER BY distance;
+"""
+
+result = RunQuery(sql)
+
+result
+**Learning Item**
+- Change the search string for some different items
+##### Search our PDFs (embeddings) and return the results as JSON
+# Now let's return the results as a JSON string that we will inject into the context of Gemini
+# LLMs understand JSON so we will have each text with the source of the text
+
+vector_search_string = 'always wanted to own a home'
+
+options = '{"fraction_lists_to_search": 0.01}'
+
+sql = f"""SELECT TO_JSON_STRING(STRUCT(base.uri as uri,
+       base.chunk_id as chunk_id,
+       base.content as content,
+       distance as vector_search_distance)) as rag_json
+  FROM VECTOR_SEARCH(TABLE `{project_id}.{dataset_name}.{pdfs_document_embeddings_table_name}`,
+                    'vector_embedding', -- column in table to search
+                    (SELECT ml_generate_embedding_result,
+                            content AS query
+                       FROM ML.GENERATE_EMBEDDING(MODEL `{project_id}.{dataset_name}.vertexai-textembedding`,
+                                                 (SELECT '{vector_search_string}' AS content),
+                                                  STRUCT(TRUE AS flatten_json_output,
+                                                        'SEMANTIC_SIMILARITY' as task_type,
+                                                        768 AS output_dimensionality) -- struct
+                     )  -- question embedding
+                     ), -- vector search
+        top_k => 10,
+        OPTIONS => '{options}')
+ORDER BY distance;
+"""
+
+result = RunQuery(sql)
+
+result
+##### Call Gemini directly in BigQuery using ML.GENERATE_TEXT
+llm_prompt = 'What type of LLM are you?'
+
+
+sql = f"""SELECT *
+  FROM ML.GENERATE_TEXT(MODEL`{project_id}.{dataset_name}.gemini-15-pro`,
+                       (SELECT '{llm_prompt}' AS prompt),
+                        STRUCT(
+                          0.8 AS temperature,
+                          1024 AS max_output_tokens,
+                          0.95 AS top_p,
+                          40 AS top_k)
+                          )
+"""
+
+result = RunQuery(sql)
+
+result
+**Learning Item**
+- Parse the returned JSON from Gemini
+- Tip: Create a UDF to parse the JSON, that way if Gemini changes the response JSON you only have 1 place to update your code.
+##### Use Gemini (BQML) to process the data we retrieve from our Embedding Search to create a summary
+# RAG Pattern
+
+vector_search_string = 'own a home'
+
+llm_prompt = """Which people have wanted a house during the loan process?
+Site the sources by explaining your results using the uri and the chunk_id.
+Quote the orginal text from the content.
+<context>
+REPLACE-ME-WITH-EMBEDDING-SEARCH-RESULTS
+</context>
+"""
+
+options = '{"fraction_lists_to_search": 0.01}'
+
+sql = f"""WITH embeddings_data AS
+(
+SELECT TO_JSON_STRING(STRUCT(base.uri as uri,
+       base.chunk_id as chunk_id,
+       base.content as content,
+       distance  as vector_search_distance)) as embeddings_json
+  FROM VECTOR_SEARCH(TABLE `{project_id}.{dataset_name}.{pdfs_document_embeddings_table_name}`,
+                    'vector_embedding', -- column in table to search
+                    (SELECT ml_generate_embedding_result,
+                            content AS query
+                       FROM ML.GENERATE_EMBEDDING(MODEL `{project_id}.{dataset_name}.vertexai-textembedding`,
+                                                 (SELECT '{vector_search_string}' AS content),
+                                                  STRUCT(TRUE AS flatten_json_output,
+                                                        'SEMANTIC_SIMILARITY' as task_type,
+                                                        768 AS output_dimensionality) -- struct
+                     )  -- question embedding
+                     ), -- vector search
+        top_k => 10,
+        OPTIONS => '{options}')
+ORDER BY distance
+)
+, embeddings_array AS
+(
+SELECT ARRAY_AGG(embeddings_json) AS embeddings_json_array
+  FROM embeddings_data
+)
+SELECT ml_generate_text_result.candidates[0].content.parts[0].text as llm_result
+  FROM ML.GENERATE_TEXT(MODEL`{project_id}.{dataset_name}.gemini-15-pro`,
+                       (SELECT REPLACE(\"\"\"{llm_prompt}\"\"\",
+                                       'REPLACE-ME-WITH-EMBEDDING-SEARCH-RESULTS',
+                                       ARRAY_TO_STRING(embeddings_json_array, '\\n')) AS prompt
+                          FROM embeddings_array),
+            STRUCT(
+              0.8 AS temperature,
+              2048 AS max_output_tokens,
+              0.95 AS top_p,
+              40 AS top_k)
+              )
+"""
+
+#print(sql)
+
+result = RunQuery(sql)
+
+result
+**Learning Item**
+- Change the search string and prompt
+- Change the temperature and other parameters
+#### Now let's ground our result using Google Search
+# Without Grounding
+
+llm_prompt = 'What is the weather today on Los Angeles California and what is the current date?'
+
+sql = f"""SELECT *
+  FROM ML.GENERATE_TEXT(MODEL`{project_id}.{dataset_name}.gemini-15-pro`,
+                       (SELECT '{llm_prompt}' AS prompt),
+                        STRUCT(
+                          0.8 AS temperature,
+                          1024 AS max_output_tokens,
+                          0.95 AS top_p,
+                          40 AS top_k)
+                          )
+"""
+
+result = RunQuery(sql)
+result
+**Learning Item**
+- Change the prompt so you get something false back.
+# Grounded
+
+llm_prompt = 'What is the weather today on Los Angeles California and what is the current date?'
+
+sql = f"""SELECT *
+  FROM ML.GENERATE_TEXT(MODEL`{project_id}.{dataset_name}.gemini-15-pro`,
+                       (SELECT '{llm_prompt}' AS prompt),
+                        STRUCT(
+                          0.8 AS temperature,
+                          1024 AS max_output_tokens,
+                          0.95 AS top_p,
+                          40 AS top_k,
+                          TRUE AS ground_with_google_search)
+                          )
+"""
+
+result = RunQuery(sql)
+result
+**Learning Item**
+- Change the prompt so you get something that is only accurate when grounded
+#### Now let's return our response in our own Json Schema
+##### Now let's out the result as formatted JSON
+# Grounded and return the results as formatted JSON
+
+# Previously you would tell the LLM in the prompt how to output JSON
+# Now we can pass in the schema for which was want our output
+# This means we can then place into a table or pass back to an application with a structured format.
+
+llm_prompt = 'What is the weather today on Los Angeles California and what is the current date?'
+
+response_schema = "city STRING, state STRING, weather_result STRUCT< weather STRING, temperature FLOAT64>"
+
+sql = f"""SELECT ml_generate_text_result.candidates[0].content.parts[0] as json_response_schema
+  FROM ML.GENERATE_TABLE(MODEL`{project_id}.{dataset_name}.gemini-15-pro`,
+                        (SELECT '{llm_prompt}' AS prompt),
+                         STRUCT(
+                          0.8 AS temperature,
+                          1024 AS max_output_tokens,
+                          0.95 AS top_p,
+                          40 AS top_k,
+                          TRUE AS ground_with_google_search,
+                          '{response_schema}' AS response_schema))
+"""
+
+result = RunQuery(sql)
+result
+**Just the Json Response Schema Result**
+```
+{
+  "city": "Los Angeles",
+  "state": "California",
+  "weather_result": {
+    "temperature": 57,
+    "weather": "Sunny"
+  }
+}
+```
+
+**Full Result**
+```
+{
+  "candidates": [
+    {
+      "avg_logprobs": -0.009560285076018303,
+      "content": {
+        "parts": [
+          {
+            "text": "{\"city\": \"Los Angeles\", \"state\": \"California\", \"weather_result\": {\"temperature\": 57, \"weather\": \"Sunny\"}}"
+          }
+        ],
+        "role": "model"
+      },
+      "finish_reason": "STOP",
+      "grounding_metadata": {
+        "retrieval_metadata": {},
+        "search_entry_point": {
+          "rendered_content": "<style>\n.container {\n align-items: center;\n border-radius: 8px;\n display: flex;\n font-family: Google Sans, Roboto, sans-serif;\n font-size: 14px;\n line-height: 20px;\n padding: 8px 12px;\n}\n.chip {\n display: inline-block;\n border: solid 1px;\n border-radius: 16px;\n min-width: 14px;\n padding: 5px 16px;\n text-align: center;\n user-select: none;\n margin: 0 8px;\n -webkit-tap-highlight-color: transparent;\n}\n.carousel {\n overflow: auto;\n scrollbar-width: none;\n white-space: nowrap;\n margin-right: -12px;\n}\n.headline {\n display: flex;\n margin-right: 4px;\n}\n.gradient-container {\n position: relative;\n}\n.gradient {\n position: absolute;\n transform: translate(3px, -9px);\n height: 36px;\n width: 9px;\n}\n@media (prefers-color-scheme: light) {\n .container {\n background-color: #fafafa;\n box-shadow: 0 0 0 1px #0000000f;\n }\n .headline-label {\n color: #1f1f1f;\n }\n .chip {\n background-color: #ffffff;\n border-color: #d2d2d2;\n color: #5e5e5e;\n text-decoration: none;\n }\n .chip:hover {\n background-color: #f2f2f2;\n }\n .chip:focus {\n background-color: #f2f2f2;\n }\n .chip:active {\n background-color: #d8d8d8;\n border-color: #b6b6b6;\n }\n .logo-dark {\n display: none;\n }\n .gradient {\n background: linear-gradient(90deg, #fafafa 15%, #fafafa00 100%);\n }\n}\n@media (prefers-color-scheme: dark) {\n .container {\n background-color: #1f1f1f;\n box-shadow: 0 0 0 1px #ffffff26;\n }\n .headline-label {\n color: #fff;\n }\n .chip {\n background-color: #2c2c2c;\n border-color: #3c4043;\n color: #fff;\n text-decoration: none;\n }\n .chip:hover {\n background-color: #353536;\n }\n .chip:focus {\n background-color: #353536;\n }\n .chip:active {\n background-color: #464849;\n border-color: #53575b;\n }\n .logo-light {\n display: none;\n }\n .gradient {\n background: linear-gradient(90deg, #1f1f1f 15%, #1f1f1f00 100%);\n }\n}\n</style>\n<div class=\"container\">\n <div class=\"headline\">\n <svg class=\"logo-light\" width=\"18\" height=\"18\" viewBox=\"9 9 35 35\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\">\n <path fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M42.8622 27.0064C42.8622 25.7839 42.7525 24.6084 42.5487 23.4799H26.3109V30.1568H35.5897C35.1821 32.3041 33.9596 34.1222 32.1258 35.3448V39.6864H37.7213C40.9814 36.677 42.8622 32.2571 42.8622 27.0064V27.0064Z\" fill=\"#4285F4\"/>\n <path fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M26.3109 43.8555C30.9659 43.8555 34.8687 42.3195 37.7213 39.6863L32.1258 35.3447C30.5898 36.3792 28.6306 37.0061 26.3109 37.0061C21.8282 37.0061 18.0195 33.9811 16.6559 29.906H10.9194V34.3573C13.7563 39.9841 19.5712 43.8555 26.3109 43.8555V43.8555Z\" fill=\"#34A853\"/>\n <path fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M16.6559 29.8904C16.3111 28.8559 16.1074 27.7588 16.1074 26.6146C16.1074 25.4704 16.3111 24.3733 16.6559 23.3388V18.8875H10.9194C9.74388 21.2072 9.06992 23.8247 9.06992 26.6146C9.06992 29.4045 9.74388 32.022 10.9194 34.3417L15.3864 30.8621L16.6559 29.8904V29.8904Z\" fill=\"#FBBC05\"/>\n <path fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M26.3109 16.2386C28.85 16.2386 31.107 17.1164 32.9095 18.8091L37.8466 13.8719C34.853 11.082 30.9659 9.3736 26.3109 9.3736C19.5712 9.3736 13.7563 13.245 10.9194 18.8875L16.6559 23.3388C18.0195 19.2636 21.8282 16.2386 26.3109 16.2386V16.2386Z\" fill=\"#EA4335\"/>\n </svg>\n <svg class=\"logo-dark\" width=\"18\" height=\"18\" viewBox=\"0 0 48 48\" xmlns=\"http://www.w3.org/2000/svg\">\n <circle cx=\"24\" cy=\"23\" fill=\"#FFF\" r=\"22\"/>\n <path d=\"M33.76 34.26c2.75-2.56 4.49-6.37 4.49-11.26 0-.89-.08-1.84-.29-3H24.01v5.99h8.03c-.4 2.02-1.5 3.56-3.07 4.56v.75l3.91 2.97h.88z\" fill=\"#4285F4\"/>\n <path d=\"M15.58 25.77A8.845 8.845 0 0 0 24 31.86c1.92 0 3.62-.46 4.97-1.31l4.79 3.71C31.14 36.7 27.65 38 24 38c-5.93 0-11.01-3.4-13.45-8.36l.17-1.01 4.06-2.85h.8z\" fill=\"#34A853\"/>\n <path d=\"M15.59 20.21a8.864 8.864 0 0 0 0 5.58l-5.03 3.86c-.98-2-1.53-4.25-1.53-6.64 0-2.39.55-4.64 1.53-6.64l1-.22 3.81 2.98.22 1.08z\" fill=\"#FBBC05\"/>\n <path d=\"M24 14.14c2.11 0 4.02.75 5.52 1.98l4.36-4.36C31.22 9.43 27.81 8 24 8c-5.93 0-11.01 3.4-13.45 8.36l5.03 3.85A8.86 8.86 0 0 1 24 14.14z\" fill=\"#EA4335\"/>\n </svg>\n <div class=\"gradient-container\"><div class=\"gradient\"></div></div>\n </div>\n <div class=\"carousel\">\n <a class=\"chip\" href=\"https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUBnsYu2oyjoz8jE3d3KgG_DYg0vL8tBr-t3pOlIq8tRY4Vn4j54VlZSN9z4zsvdZ3Xs5ptbQ7XaEiuoD3Mq9ZDk6ed9CRFuG4cK9B5_wJBhq-2P6w6FLkg8ck5dcL0DT9Zx1yF2NFMq0PStZtPZSKd-jbovNpdrouQO1uWhmb9l6fzcHyGKOLqf5kh2wSj2JPzpk-Yqok8BQiZW_hvEZ8-siXqkoupNhoykzg==\">weather los angeles california today</a>\n </div>\n</div>\n"
+        },
+        "web_search_queries": [
+          "weather los angeles california today"
+        ]
+      },
+      "safety_ratings": [
+        {
+          "category": "HARM_CATEGORY_HATE_SPEECH",
+          "probability": "NEGLIGIBLE",
+          "probability_score": 0.0390625,
+          "severity": "HARM_SEVERITY_NEGLIGIBLE",
+          "severity_score": 0.08154297
+        },
+        {
+          "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+          "probability": "NEGLIGIBLE",
+          "probability_score": 0.15527344,
+          "severity": "HARM_SEVERITY_NEGLIGIBLE",
+          "severity_score": 0.08251953
+        },
+        {
+          "category": "HARM_CATEGORY_HARASSMENT",
+          "probability": "NEGLIGIBLE",
+          "probability_score": 0.064453125,
+          "severity": "HARM_SEVERITY_NEGLIGIBLE",
+          "severity_score": 0.044677734
+        },
+        {
+          "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+          "probability": "NEGLIGIBLE",
+          "probability_score": 0.06738281,
+          "severity": "HARM_SEVERITY_NEGLIGIBLE",
+          "severity_score": 0.10253906
+        }
+      ],
+      "score": -0.2963688373565674
+    }
+  ],
+  "create_time": "2025-01-24T17:19:06.374380Z",
+  "model_version": "gemini-1.5-pro-001",
+  "response_id": "isuTZ-zsFo2QhMIPrKrd6As",
+  "usage_metadata": {
+    "billable_prompt_usage": {
+      "text_count": 108
+    },
+    "candidates_token_count": 31,
+    "prompt_token_count": 23,
+    "total_token_count": 54
+  }
+}
+```
+**Learning Item**
+- Copy the SQL from the cell "# RAG Pattern" and have it output the data using ML.GENERATE_TABLE so we get the person's name, explaination, uri and chuck_id as seperate fields.
+**Final Thoughts**
+- The PDFs used in this demo are basically a "form" and you could use the Forms parser to parse the items. You can update the above code to use it instead of the layout parser.
\ No newline at end of file
diff --git a/definitions/sources/badges.sqlx b/definitions/sources/badges.sqlx
index 41a9bab..e688f0b 100644
--- a/definitions/sources/badges.sqlx
+++ b/definitions/sources/badges.sqlx
@@ -1,6 +1,6 @@
 config {
   type: "declaration",
-  database: "bigquery-public-data",
-  schema: "stackoverflow",
+  database: "gauravbose-189-202207222040400",
+  schema: "stack_overflow",
   name: "badges"
 }
\ No newline at end of file
diff --git a/definitions/sources/posts_answers.sqlx b/definitions/sources/posts_answers.sqlx
index dcd9f38..d37b35d 100644
--- a/definitions/sources/posts_answers.sqlx
+++ b/definitions/sources/posts_answers.sqlx
@@ -1,6 +1,6 @@
 config {
   type: "declaration",
-  database: "bigquery-public-data",
-  schema: "stackoverflow",
+  database: "gauravbose-189-202207222040400",
+  schema: "stack_overflow",
   name: "posts_answers"
 }
\ No newline at end of file
diff --git a/definitions/sources/posts_questions.sqlx b/definitions/sources/posts_questions.sqlx
index f72312d..c954b54 100644
--- a/definitions/sources/posts_questions.sqlx
+++ b/definitions/sources/posts_questions.sqlx
@@ -1,6 +1,6 @@
 config {
   type: "declaration",
-  database: "bigquery-public-data",
-  schema: "stackoverflow",
+  database: "gauravbose-189-202207222040400",
+  schema: "stack_overflow",
   name: "posts_questions"
 }
\ No newline at end of file
diff --git a/definitions/sources/users.sqlx b/definitions/sources/users.sqlx
index ebe479a..826a96c 100644
--- a/definitions/sources/users.sqlx
+++ b/definitions/sources/users.sqlx
@@ -1,6 +1,6 @@
 config {
   type: "declaration",
-  database: "bigquery-public-data",
-  schema: "stackoverflow",
+  database: "gauravbose-189-202207222040400",
+  schema: "stack_overflow",
   name: "users"
 }
\ No newline at end of file
diff --git a/definitions/sources/votes.sqlx b/definitions/sources/votes.sqlx
new file mode 100644
index 0000000..6839965
--- /dev/null
+++ b/definitions/sources/votes.sqlx
@@ -0,0 +1,6 @@
+config {
+  type: "declaration",
+  database: "gauravbose-189-202207222040400",
+  schema: "stack_overflow",
+  name: "votes"
+}
\ No newline at end of file
diff --git a/definitions/staging/stg_users.sqlx b/definitions/staging/stg_users.sqlx
index 9b791a6..1263e0f 100644
--- a/definitions/staging/stg_users.sqlx
+++ b/definitions/staging/stg_users.sqlx
@@ -1,7 +1,7 @@
 config {
   type: "view",
   schema: "staging",
-  description: "Cleaned version of stackoverflow.users table"
+  description: "Cleaned version of stackoverflow.votes_test table"
 }
 
 select
diff --git a/definitions/staging/stg_votes.sqlx b/definitions/staging/stg_votes.sqlx
new file mode 100644
index 0000000..0abe869
--- /dev/null
+++ b/definitions/staging/stg_votes.sqlx
@@ -0,0 +1,15 @@
+config {
+  type: "view",
+  schema: "staging",
+  description: "Cleaned version of stackoverflow.votes table"
+}
+
+select
+  post_id,
+  creation_date,
+  count(vote_type_id) as distinct_votes
+from
+  ${ref("votes")}
+  group by post_id, creation_date
+  having count(vote_type_id) > 1
+  limit 100
\ No newline at end of file
diff --git a/workflow_settings.yaml b/workflow_settings.yaml
index 414eda1..d1a32c1 100644
--- a/workflow_settings.yaml
+++ b/workflow_settings.yaml
@@ -1,5 +1,5 @@
 dataformCoreVersion: 3.0.0
 defaultLocation: US
-defaultProject: dataform-dogfood-shared
-defaultDataset: dataform_stackoverflow
+defaultProject: gauravbose-189-202207222040400
+defaultDataset: staging
 defaultAssertionDataset: dataform_stackoverflow_assertions