Skip to content

Commit ab0b88d

Browse files
authored
Merge pull request #21 from NakaokaRei/feat/locate-all-on-screen
feat: add locateAllOnScreen() function
2 parents 2ba882b + f23971c commit ab0b88d

File tree

4 files changed

+349
-0
lines changed

4 files changed

+349
-0
lines changed

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,27 @@ if let center = SwiftAutoGUI.locateCenterOnScreen("target.png", confidence: 0.8,
170170
SwiftAutoGUI.move(to: center)
171171
SwiftAutoGUI.leftClick()
172172
}
173+
174+
// Find all occurrences of an image on screen
175+
let buttons = SwiftAutoGUI.locateAllOnScreen("button.png")
176+
print("Found \(buttons.count) buttons")
177+
for (index, button) in buttons.enumerated() {
178+
print("Button \(index): \(button)")
179+
SwiftAutoGUI.move(to: CGPoint(x: button.midX, y: button.midY))
180+
SwiftAutoGUI.leftClick()
181+
Thread.sleep(forTimeInterval: 0.5)
182+
}
183+
184+
// locateAllOnScreen with confidence threshold for flexible matching
185+
let icons = SwiftAutoGUI.locateAllOnScreen("app_icon.png", confidence: 0.85)
186+
for icon in icons {
187+
// Process each found icon
188+
print("Found icon at: \(icon)")
189+
}
190+
191+
// Search for multiple matches in a specific region
192+
let topRegion = CGRect(x: 0, y: 0, width: 1920, height: 100)
193+
let menuItems = SwiftAutoGUI.locateAllOnScreen("menu_item.png", region: topRegion)
173194
```
174195

175196
# Contributors

Sample/Sample/ContentView.swift

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ struct ContentView: View {
144144
locateAndClickTestImage()
145145
}
146146

147+
Button("Find All Test Images") {
148+
findAllTestImages()
149+
}
150+
147151
if !imageRecognitionResult.isEmpty {
148152
Text(imageRecognitionResult)
149153
.font(.caption)
@@ -280,6 +284,52 @@ struct ContentView: View {
280284
imageRecognitionResult = "Test image not found on screen. Make sure the test image is visible in an app window."
281285
}
282286
}
287+
288+
private func findAllTestImages() {
289+
guard !testImagePath.isEmpty else {
290+
imageRecognitionResult = "Please create a test image first"
291+
return
292+
}
293+
294+
imageRecognitionResult = "Searching for all test images..."
295+
296+
// Find all instances of the test image
297+
let allMatches = SwiftAutoGUI.locateAllOnScreen(testImagePath, confidence: 0.8)
298+
299+
if !allMatches.isEmpty {
300+
imageRecognitionResult = "Found \(allMatches.count) instances of the test image:\n"
301+
302+
// Show details of each match and highlight them
303+
for (index, rect) in allMatches.enumerated() {
304+
imageRecognitionResult += "\n[\(index + 1)] at x=\(Int(rect.origin.x)), y=\(Int(rect.origin.y)), size=\(Int(rect.width))x\(Int(rect.height))"
305+
306+
// Draw a box around each found instance
307+
let corners = [
308+
CGPoint(x: rect.origin.x, y: rect.origin.y),
309+
CGPoint(x: rect.origin.x + rect.width, y: rect.origin.y),
310+
CGPoint(x: rect.origin.x + rect.width, y: rect.origin.y + rect.height),
311+
CGPoint(x: rect.origin.x, y: rect.origin.y + rect.height),
312+
CGPoint(x: rect.origin.x, y: rect.origin.y)
313+
]
314+
315+
for i in 0..<corners.count - 1 {
316+
SwiftAutoGUI.move(to: corners[i])
317+
Thread.sleep(forTimeInterval: 0.1)
318+
SwiftAutoGUI.move(to: corners[i + 1])
319+
Thread.sleep(forTimeInterval: 0.1)
320+
}
321+
}
322+
323+
// Move to center of the first match
324+
if let firstMatch = allMatches.first {
325+
SwiftAutoGUI.move(to: CGPoint(x: firstMatch.midX, y: firstMatch.midY))
326+
}
327+
328+
imageRecognitionResult += "\n\nHighlighted all \(allMatches.count) matches!"
329+
} else {
330+
imageRecognitionResult = "No test images found on screen. Try opening multiple windows with the test image visible."
331+
}
332+
}
283333
}
284334

285335
struct ContentView_Previews: PreviewProvider {

Sources/SwiftAutoGUI/ImageRecognition.swift

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,66 @@ extension SwiftAutoGUI {
120120
return CGPoint(x: rect.midX, y: rect.midY)
121121
}
122122

123+
/// Locate all instances of an image on the screen and return their positions
124+
///
125+
/// - Parameters:
126+
/// - imagePath: Path to the image file to search for
127+
/// - grayscale: Convert to grayscale for faster matching (currently ignored, for future implementation)
128+
/// - confidence: Matching confidence threshold (0.0-1.0). If nil, uses exact matching (0.95 by default)
129+
/// - region: Limit search to specific screen region. If nil, searches entire screen
130+
/// - Returns: Array of CGRect with locations (x, y, width, height) of all found instances, empty array if none found
131+
///
132+
/// This method uses OpenCV's template matching algorithm to find all instances of an image on the screen.
133+
/// It applies non-maximum suppression to avoid duplicate detections of the same object.
134+
///
135+
/// Example:
136+
/// ```swift
137+
/// // Find all buttons on screen
138+
/// let buttons = SwiftAutoGUI.locateAllOnScreen("button.png")
139+
/// for (index, button) in buttons.enumerated() {
140+
/// print("Button \(index) found at: \(button)")
141+
/// SwiftAutoGUI.move(to: CGPoint(x: button.midX, y: button.midY))
142+
/// SwiftAutoGUI.leftClick()
143+
/// Thread.sleep(forTimeInterval: 0.5)
144+
/// }
145+
///
146+
/// // Find all matches with confidence threshold
147+
/// let closeButtons = SwiftAutoGUI.locateAllOnScreen("close_button.png", confidence: 0.85)
148+
/// print("Found \(closeButtons.count) close buttons")
149+
///
150+
/// // Search in specific region for better performance
151+
/// let searchRegion = CGRect(x: 0, y: 0, width: 800, height: 600)
152+
/// let icons = SwiftAutoGUI.locateAllOnScreen("icon.png", region: searchRegion)
153+
/// ```
154+
public static func locateAllOnScreen(
155+
_ imagePath: String,
156+
grayscale: Bool = false,
157+
confidence: Double? = nil,
158+
region: CGRect? = nil
159+
) -> [CGRect] {
160+
// Load the needle image
161+
guard let needleImage = NSImage(contentsOfFile: imagePath) else {
162+
print("SwiftAutoGUI: Could not load image from path: \(imagePath)")
163+
return []
164+
}
165+
166+
// Take screenshot of the region or entire screen
167+
let screenshot: NSImage?
168+
if let region = region {
169+
screenshot = self.screenshot(region: region)
170+
} else {
171+
screenshot = self.screenshot()
172+
}
173+
174+
guard let haystackImage = screenshot else {
175+
print("SwiftAutoGUI: Could not capture screenshot")
176+
return []
177+
}
178+
179+
// Perform image matching to find all instances
180+
return findAllImagesInImage(needle: needleImage, haystack: haystackImage, confidence: confidence, searchRegion: region)
181+
}
182+
123183
// MARK: Private Helper Methods
124184

125185
/// Find needle image within haystack image using OpenCV template matching
@@ -204,6 +264,124 @@ extension SwiftAutoGUI {
204264

205265
return nil
206266
}
267+
268+
/// Find all instances of needle image within haystack image using OpenCV template matching
269+
private static func findAllImagesInImage(
270+
needle: NSImage,
271+
haystack: NSImage,
272+
confidence: Double?,
273+
searchRegion: CGRect?
274+
) -> [CGRect] {
275+
// Convert NSImages to OpenCV Mat format
276+
guard let needleMat = needle.toMat(),
277+
let haystackMat = haystack.toMat() else {
278+
print("SwiftAutoGUI: Could not convert images to OpenCV Mat")
279+
return []
280+
}
281+
282+
// Apply search region if specified
283+
let searchMat: Mat
284+
let regionOffset: CGPoint
285+
286+
if let region = searchRegion {
287+
let rect = Rect2i(
288+
x: Int32(region.origin.x),
289+
y: Int32(region.origin.y),
290+
width: Int32(region.width),
291+
height: Int32(region.height)
292+
)
293+
searchMat = Mat(mat: haystackMat, rect: rect)
294+
regionOffset = region.origin
295+
} else {
296+
searchMat = haystackMat
297+
regionOffset = .zero
298+
}
299+
300+
// Perform template matching using OpenCV
301+
let result = Mat()
302+
Imgproc.matchTemplate(
303+
image: searchMat,
304+
templ: needleMat,
305+
result: result,
306+
method: TemplateMatchModes.TM_CCOEFF_NORMED // Normalized correlation coefficient
307+
)
308+
309+
let threshold = confidence ?? 0.95
310+
var matches: [CGRect] = []
311+
312+
// Get screen scale factor
313+
let screen = NSScreen.main ?? NSScreen.screens[0]
314+
let scaleFactor = screen.backingScaleFactor
315+
316+
// Find all matches above threshold
317+
var resultData = [Float](repeating: 0, count: Int(result.rows() * result.cols()))
318+
_ = try? result.get(row: 0, col: 0, data: &resultData)
319+
320+
let templateWidth = needleMat.cols()
321+
let templateHeight = needleMat.rows()
322+
323+
// Create a copy of result to track which areas we've already processed
324+
var processedMask = Array(repeating: false, count: resultData.count)
325+
326+
while true {
327+
// Find the maximum value and its location
328+
var maxVal: Float = -1
329+
var maxIdx = -1
330+
331+
for i in 0..<resultData.count {
332+
if !processedMask[i] && resultData[i] > maxVal {
333+
maxVal = resultData[i]
334+
maxIdx = i
335+
}
336+
}
337+
338+
// If no more matches above threshold, break
339+
if maxVal < Float(threshold) || maxIdx == -1 {
340+
break
341+
}
342+
343+
// Calculate coordinates from the index
344+
let y = maxIdx / Int(result.cols())
345+
let x = maxIdx % Int(result.cols())
346+
347+
// Convert OpenCV coordinates to CGRect
348+
let pixelRect = CGRect(
349+
x: CGFloat(x) + regionOffset.x,
350+
y: CGFloat(y) + regionOffset.y,
351+
width: CGFloat(templateWidth),
352+
height: CGFloat(templateHeight)
353+
)
354+
355+
// Convert from pixels to points (logical coordinates)
356+
let pointRect = CGRect(
357+
x: pixelRect.origin.x / scaleFactor,
358+
y: pixelRect.origin.y / scaleFactor,
359+
width: pixelRect.width / scaleFactor,
360+
height: pixelRect.height / scaleFactor
361+
)
362+
363+
matches.append(pointRect)
364+
365+
// Apply non-maximum suppression: mark nearby pixels as processed
366+
// to avoid detecting the same object multiple times
367+
let suppressionRadius = min(templateWidth, templateHeight) / 2
368+
369+
for dy in -suppressionRadius...suppressionRadius {
370+
for dx in -suppressionRadius...suppressionRadius {
371+
let ny = y + Int(dy)
372+
let nx = x + Int(dx)
373+
374+
if ny >= 0 && ny < Int(result.rows()) && nx >= 0 && nx < Int(result.cols()) {
375+
let idx = ny * Int(result.cols()) + nx
376+
processedMask[idx] = true
377+
}
378+
}
379+
}
380+
}
381+
382+
print("SwiftAutoGUI: Found \(matches.count) matches with confidence >= \(threshold)")
383+
return matches
384+
}
207385
}
208386

209387
// MARK: - NSImage to OpenCV Mat conversion

Tests/SwiftAutoGUITests/ImageRecognitionTests.swift

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,106 @@ struct ImageRecognitionTests {
136136
#expect(true)
137137
}
138138

139+
@Test("locateAllOnScreen with valid image path")
140+
func testLocateAllOnScreenValidPath() {
141+
// Create a test image
142+
let testImagePath = createTestImage()
143+
defer { try? FileManager.default.removeItem(atPath: testImagePath) }
144+
145+
// Try to locate all instances of the image
146+
let results = SwiftAutoGUI.locateAllOnScreen(testImagePath)
147+
148+
// Results should be an array (possibly empty)
149+
#expect(results.count >= 0)
150+
151+
// If any images are found, verify they have valid dimensions
152+
for rect in results {
153+
#expect(rect.width > 0)
154+
#expect(rect.height > 0)
155+
}
156+
157+
// Test passes whether or not images are found (depends on permissions)
158+
#expect(true)
159+
}
160+
161+
@Test("locateAllOnScreen with invalid image path")
162+
func testLocateAllOnScreenInvalidPath() {
163+
let results = SwiftAutoGUI.locateAllOnScreen("/nonexistent/image.png")
164+
165+
// Should return empty array for invalid path
166+
#expect(results.isEmpty)
167+
}
168+
169+
@Test("locateAllOnScreen with region")
170+
func testLocateAllOnScreenWithRegion() {
171+
// Create a test image
172+
let testImagePath = createTestImage()
173+
defer { try? FileManager.default.removeItem(atPath: testImagePath) }
174+
175+
// Search in a specific region
176+
let region = CGRect(x: 0, y: 0, width: 400, height: 400)
177+
let results = SwiftAutoGUI.locateAllOnScreen(testImagePath, region: region)
178+
179+
// If found, verify all results are within the search region
180+
for rect in results {
181+
// The found rectangle should at least partially overlap with the search region
182+
#expect(region.intersects(rect))
183+
}
184+
185+
// Test passes whether or not images are found
186+
#expect(true)
187+
}
188+
189+
@Test("locateAllOnScreen with confidence parameter")
190+
func testLocateAllOnScreenWithConfidence() {
191+
// Create a test image
192+
let testImagePath = createTestImage()
193+
defer { try? FileManager.default.removeItem(atPath: testImagePath) }
194+
195+
// Search with different confidence levels
196+
let highConfidenceResults = SwiftAutoGUI.locateAllOnScreen(testImagePath, confidence: 0.95)
197+
let lowConfidenceResults = SwiftAutoGUI.locateAllOnScreen(testImagePath, confidence: 0.5)
198+
199+
// Lower confidence should find same or more matches
200+
// But both could be empty if no screen access
201+
#expect(highConfidenceResults.count <= lowConfidenceResults.count)
202+
203+
// Test passes whether or not images are found
204+
#expect(true)
205+
}
206+
207+
@Test("locateAllOnScreen returns non-overlapping results")
208+
func testLocateAllOnScreenNonOverlapping() {
209+
// Create a test image
210+
let testImagePath = createTestImage()
211+
defer { try? FileManager.default.removeItem(atPath: testImagePath) }
212+
213+
// Find all instances
214+
let results = SwiftAutoGUI.locateAllOnScreen(testImagePath, confidence: 0.8)
215+
216+
// Verify that results don't significantly overlap (due to non-maximum suppression)
217+
for i in 0..<results.count {
218+
for j in (i+1)..<results.count {
219+
let rect1 = results[i]
220+
let rect2 = results[j]
221+
222+
// Calculate overlap
223+
let intersection = rect1.intersection(rect2)
224+
let overlap = intersection.width * intersection.height
225+
let minArea = min(rect1.width * rect1.height, rect2.width * rect2.height)
226+
227+
// Overlap should be less than 50% of the smaller rectangle
228+
if minArea > 0 {
229+
let overlapRatio = overlap / minArea
230+
#expect(overlapRatio < 0.5)
231+
}
232+
}
233+
}
234+
235+
// Test passes
236+
#expect(true)
237+
}
238+
139239
// Helper function to create a test image
140240
private func createTestImage() -> String {
141241
let size = NSSize(width: 50, height: 50)

0 commit comments

Comments
 (0)