VOOZH about

URL: https://dev.to/yushulx/build-a-swiftui-ios-document-scanner-with-stable-auto-capture-and-pdf-export-ie9

⇱ Build a SwiftUI iOS Document Scanner with Stable Auto Capture and PDF Export - DEV Community


This project builds a SwiftUI document scanner for iPhone and iPad that captures pages from the camera, stabilizes the detected document quad, and lets users review, edit, and export the final scan set. It uses Dynamsoft Capture Vision on iOS, so the same camera feed powers auto capture, manual fallback capture, gallery import, and deskewed export.

What you'll build: A SwiftUI iOS document scanner with live camera capture, page review, crop adjustment, and PDF or JPEG export using Dynamsoft Capture Vision.

Demo Video: Live document capture, page review, crop editing, and export on iPhone

Prerequisites

  • Xcode 16 or later
  • iOS 16 or later
  • A valid Dynamsoft Capture Vision license key
  • A Mac with SwiftPM package resolution enabled in Xcode

Get a 30-day free trial license

Step 1: Install and Configure the SDK

The project depends on the Swift Package Manager package capture-vision-spm, which provides the Capture Vision products used by the scanner. The package is pinned in the Xcode project, and the app initializes the SDK during launch.

/* Begin XCRemoteSwiftPackageReference section */
 C30000000000000000000001 /* XCRemoteSwiftPackageReference "capture-vision-spm" */ = {
 isa = XCRemoteSwiftPackageReference;
 repositoryURL = "https://github.com/Dynamsoft/capture-vision-spm";
 requirement = {
 kind = upToNextMajorVersion;
 minimumVersion = 3.4.1200;
 };
 };
/* End XCRemoteSwiftPackageReference section */
import SwiftUI
import DynamsoftCaptureVisionBundle

@main
struct DynamsoftDocumentScannerApp: App {
 @StateObject private var store = DocumentScannerStore()

 init() {
 LicenseManager.initLicense(
 "LICENSE-KEY",
 verificationDelegate: nil
 )
 }

 var body: some Scene {
 WindowGroup {
 ContentView()
 .environmentObject(store)
 }
 }
}

Step 2: Stabilize Document Detection from the Camera Feed

The scanner view creates a CameraView, attaches a CameraEnhancer, and starts the DetectAndNormalizeDocument_Default template. It also keeps a short cooldown window so the same document is not captured repeatedly.

import SwiftUI
import UIKit
import DynamsoftCaptureVisionBundle

struct CameraScannerView: UIViewControllerRepresentable {
 @EnvironmentObject private var store: DocumentScannerStore

 let manualCaptureToken: Int
 let settings: AutoCaptureSettings

 func makeCoordinator() -> Coordinator {
 Coordinator(store: store, settings: settings)
 }

 func makeUIViewController(context: Context) -> UIViewController {
 context.coordinator.makeViewController()
 }

 func updateUIViewController(_ uiViewController: UIViewController, context: Context) {
 context.coordinator.update(settings: settings, manualCaptureToken: manualCaptureToken)
 }

 static func dismantleUIViewController(_ uiViewController: UIViewController, coordinator: Coordinator) {
 coordinator.stop()
 }

 final class Coordinator: NSObject, CapturedResultReceiver {
 private struct CaptureCandidate {
 let originalImageData: ImageData?
 let normalizedImageData: ImageData
 let quad: Quadrilateral?
 let crossVerified: Bool
 }

 private unowned let store: DocumentScannerStore
 private let router = CaptureVisionRouter()
 private let stabilizer: QuadStabilizer

 private var cameraEnhancer: CameraEnhancer?
 private var latestCandidate: CaptureCandidate?
 private var cooldown = false
 private var awaitingManualCapture = false
 private var lastManualCaptureToken = 0
 private var fallbackWorkItem: DispatchWorkItem?

 init(store: DocumentScannerStore, settings: AutoCaptureSettings) {
 self.store = store
 self.stabilizer = QuadStabilizer(settings: settings)
 super.init()
 }

 func makeViewController() -> UIViewController {
 let controller = UIViewController()
 controller.view.backgroundColor = .black

 let cameraView = CameraView(frame: .zero)
 cameraView.translatesAutoresizingMaskIntoConstraints = false
 cameraView.scanRegionMaskVisible = false
 cameraView.torchButtonVisible = false
 controller.view.addSubview(cameraView)

 NSLayoutConstraint.activate([
 cameraView.leadingAnchor.constraint(equalTo: controller.view.leadingAnchor),
 cameraView.trailingAnchor.constraint(equalTo: controller.view.trailingAnchor),
 cameraView.topAnchor.constraint(equalTo: controller.view.topAnchor),
 cameraView.bottomAnchor.constraint(equalTo: controller.view.bottomAnchor)
 ])

 cameraEnhancer = CameraEnhancer(view: cameraView)

 if let cameraEnhancer {
 cameraEnhancer.setResolution(.resolution1080P)
 try? router.setInput(cameraEnhancer)
 }

 configureTemplate()
 router.addResultReceiver(self)
 start()

 return controller
 }

 func update(settings: AutoCaptureSettings, manualCaptureToken: Int) {
 stabilizer.settings = settings
 if lastManualCaptureToken != manualCaptureToken {
 lastManualCaptureToken = manualCaptureToken
 requestManualCapture()
 }
 }

 func start() {
 cameraEnhancer?.open()
 router.startCapturing(detectAndNormalizeTemplateName) { [weak self] isSuccess, error in
 guard let self, !isSuccess else { return }
 Task { @MainActor in
 self.store.errorMessage = error?.localizedDescription ?? "Failed to start capturing."
 }
 }
 }

 func stop() {
 fallbackWorkItem?.cancel()
 fallbackWorkItem = nil
 router.stopCapturing()
 router.removeAllResultReceivers()
 cameraEnhancer?.close()
 }

 private func configureTemplate() {
 guard let settings = try? router.getSimplifiedSettings(detectAndNormalizeTemplateName) else {
 return
 }

 settings.outputOriginalImage = true
 settings.minImageCaptureInterval = 0
 settings.timeout = 3000
 settings.maxParallelTasks = 1
 settings.documentSettings?.expectedDocumentsCount = 1
 _ = try? router.updateSettings(detectAndNormalizeTemplateName, settings: settings)
 }

 private func requestManualCapture() {
 guard !cooldown else { return }

 fallbackWorkItem?.cancel()
 latestCandidate = nil
 awaitingManualCapture = true

 let workItem = DispatchWorkItem { [weak self] in
 self?.captureFallbackFrameIfNeeded()
 }
 fallbackWorkItem = workItem
 DispatchQueue.main.asyncAfter(deadline: .now() + 0.5, execute: workItem)
 }

 func onProcessedDocumentResultReceived(_ result: ProcessedDocumentResult) {
 guard let item = result.deskewedImageResultItems?.first,
 let normalizedImageData = item.imageData else {
 return
 }

 let originalImageData = router.getIntermediateResultManager().getOriginalImage(result.originalImageHashId)
 let candidate = CaptureCandidate(
 originalImageData: originalImageData,
 normalizedImageData: normalizedImageData,
 quad: cloneQuadrilateral(item.sourceDeskewQuad),
 crossVerified: item.crossVerificationStatus.rawValue == 1
 )

 latestCandidate = candidate

 if awaitingManualCapture {
 awaitingManualCapture = false
 fallbackWorkItem?.cancel()
 fallbackWorkItem = nil
 commit(candidate, autoCaptured: false)
 return
 }

 guard candidate.crossVerified, let quad = candidate.quad else { return }
 if stabilizer.feed(quad) {
 commit(candidate, autoCaptured: true)
 }
 }
 }
}

Step 3: Review, Adjust, and Export Scans

The store keeps the page list, selected page, color mode, and export payloads in one place. It also handles rotation, retake, reordering, PDF generation, and image export for the final scan batch.

import Foundation
import SwiftUI
import UIKit
import DynamsoftCaptureVisionBundle

@MainActor
final class DocumentScannerStore: ObservableObject {
 @Published var route: ScannerRoute = .scanner
 @Published var pages: [ScannedPage] = []
 @Published var selectedPageIndex: Int = 0
 @Published var retakePageIndex: Int? = nil
 @Published var autoCaptureSettings = AutoCaptureSettings.load() {
 didSet {
 autoCaptureSettings.persist()
 }
 }
 @Published var autoCaptureFlashVisible = false
 @Published var showSettings = false
 @Published var editorTarget: EditorTarget? = nil
 @Published var sharePayload: SharePayload? = nil
 @Published var errorMessage: String? = nil

 func integrateCapturedPage(_ page: ScannedPage, autoCaptured: Bool) {
 if let retakePageIndex, pages.indices.contains(retakePageIndex) {
 pages[retakePageIndex] = page
 selectedPageIndex = retakePageIndex
 self.retakePageIndex = nil
 route = .results
 } else {
 pages.append(page)
 selectedPageIndex = max(0, pages.count - 1)
 }

 if autoCaptured {
 autoCaptureFlashVisible = true
 Task { @MainActor in
 try? await Task.sleep(nanoseconds: 1_200_000_000)
 self.autoCaptureFlashVisible = false
 }
 }
 }

 func rotateSelectedPage() {
 guard pages.indices.contains(selectedPageIndex) else { return }
 pages[selectedPageIndex].rotationQuarterTurns = (pages[selectedPageIndex].rotationQuarterTurns + 1) % 4
 }

 func exportPDF() {
 let images = pages.compactMap { $0.renderedImage() }
 guard !images.isEmpty else { return }

 let exportURL = FileManager.default.temporaryDirectory
 .appendingPathComponent("dynamsoft-scan-\(UUID().uuidString)")
 .appendingPathExtension("pdf")

 let firstBounds = CGRect(origin: .zero, size: images[0].size)
 let renderer = UIGraphicsPDFRenderer(bounds: firstBounds)

 do {
 try renderer.writePDF(to: exportURL) { context in
 for image in images {
 let bounds = CGRect(origin: .zero, size: image.size)
 context.beginPage(withBounds: bounds, pageInfo: [:])
 image.draw(in: bounds)
 }
 }
 sharePayload = SharePayload(items: [exportURL])
 } catch {
 errorMessage = error.localizedDescription
 }
 }
}
import SwiftUI
import UIKit
import DynamsoftCaptureVisionBundle

struct AutoCaptureSettingsSheet: View {
 @EnvironmentObject private var store: DocumentScannerStore

 var body: some View {
 NavigationStack {
 Form {
 Toggle("Enable auto capture", isOn: $store.autoCaptureSettings.autoCaptureEnabled)

 VStack(alignment: .leading, spacing: 8) {
 HStack {
 Text("IoU threshold")
 Spacer()
 Text(store.autoCaptureSettings.iouThreshold.formatted(.number.precision(.fractionLength(2))))
 .foregroundStyle(.secondary)
 }
 Slider(value: $store.autoCaptureSettings.iouThreshold, in: 0.5...0.98)
 }

 VStack(alignment: .leading, spacing: 8) {
 HStack {
 Text("Area delta threshold")
 Spacer()
 Text(store.autoCaptureSettings.areaDeltaThreshold.formatted(.number.precision(.fractionLength(2))))
 .foregroundStyle(.secondary)
 }
 Slider(value: $store.autoCaptureSettings.areaDeltaThreshold, in: 0.02...0.30)
 }

 VStack(alignment: .leading, spacing: 8) {
 HStack {
 Text("Stable frame count")
 Spacer()
 Text("\(store.autoCaptureSettings.stableFrameCount)")
 .foregroundStyle(.secondary)
 }
 Stepper(value: $store.autoCaptureSettings.stableFrameCount, in: 1...8) {
 EmptyView()
 }
 }
 }
 .navigationTitle("Stabilization")
 .toolbar {
 ToolbarItem(placement: .confirmationAction) {
 Button("Done") {
 store.showSettings = false
 }
 }
 }
 }
 }
}

struct QuadEditorSheet: View {
 let page: ScannedPage
 let onApply: (Quadrilateral) -> Void
 let onCancel: () -> Void

 @StateObject private var session = QuadEditorSession()

 var body: some View {
 VStack(spacing: 0) {
 HStack {
 Button("Cancel") {
 onCancel()
 }

 Spacer()

 Text("Adjust Crop")
 .font(.system(size: 18, weight: .bold, design: .rounded))

 Spacer()

 Button("Apply") {
 if let quad = session.currentQuad() {
 onApply(quad)
 } else {
 onCancel()
 }
 }
 .fontWeight(.semibold)
 }
 .padding(.horizontal, 18)
 .padding(.vertical, 14)
 .background(.thinMaterial)

 if page.originalImageData != nil {
 QuadEditorRepresentable(page: page, session: session)
 .ignoresSafeArea(edges: .bottom)
 } else {
 Spacer()
 Text("This page does not have an editable source image.")
 Spacer()
 }
 }
 }
}

Step 4: Import Photos and Deskew Existing Images

The app also supports gallery imports. If Capture Vision finds a document in the imported image, the store saves the deskewed image and its quad; otherwise it keeps the original image as a fallback page.

 func importPhotoData(_ data: Data) {
 guard let uiImage = UIImage(data: data)?.normalizedOrientationImage() else {
 errorMessage = "Unable to decode the selected image."
 return
 }

 let originalImageData = try? ImageIO().read(fromMemory: data)
 let capturedResult = importRouter.captureFromImage(uiImage, templateName: detectAndNormalizeTemplateName)

 if let item = capturedResult.processedDocumentResult?.deskewedImageResultItems?.first,
 let normalizedImageData = item.imageData {
 let page = ScannedPage(
 originalImageData: originalImageData,
 normalizedImageData: normalizedImageData,
 quad: cloneQuadrilateral(item.sourceDeskewQuad),
 fallbackImage: nil
 )
 integrateCapturedPage(page, autoCaptured: false)
 return
 }

 let page = ScannedPage(
 originalImageData: nil,
 normalizedImageData: nil,
 quad: nil,
 fallbackImage: uiImage
 )
 integrateCapturedPage(page, autoCaptured: false)
 }

Step 5: Trigger Auto Capture Only After the Document Quad Stabilizes

The scanner does not commit a page on the first detection. Instead, it compares each new quadrilateral against the previous one and only auto-captures when the overlap and area change stay within the configured thresholds for several consecutive frames.

import Foundation
import DynamsoftCaptureVisionBundle

final class QuadStabilizer {
 var settings: AutoCaptureSettings

 private var previousQuad: Quadrilateral?
 private var consecutiveStableFrames = 0

 init(settings: AutoCaptureSettings) {
 self.settings = settings
 }

 func reset() {
 previousQuad = nil
 consecutiveStableFrames = 0
 }

 func feed(_ quad: Quadrilateral) -> Bool {
 guard settings.autoCaptureEnabled else { return false }

 guard let previousQuad else {
 self.previousQuad = cloneQuadrilateral(quad)
 consecutiveStableFrames = 0
 return false
 }

 let iou = quadrilateralIoU(previousQuad, quad)
 let previousArea = quadrilateralArea(previousQuad)
 let currentArea = quadrilateralArea(quad)
 let areaDelta = previousArea > 0 ? abs(currentArea - previousArea) / previousArea : 1

 if iou >= settings.iouThreshold && areaDelta <= settings.areaDeltaThreshold {
 consecutiveStableFrames += 1
 if consecutiveStableFrames >= settings.stableFrameCount {
 reset()
 return true
 }
 } else {
 consecutiveStableFrames = 0
 }

 self.previousQuad = cloneQuadrilateral(quad)
 return false
 }
}
import CoreGraphics
import DynamsoftCaptureVisionBundle

func quadrilateralIoU(_ lhs: Quadrilateral, _ rhs: Quadrilateral) -> Double {
 let a = quadrilateralBounds(lhs)
 let b = quadrilateralBounds(rhs)

 let intersection = a.intersection(b)
 guard !intersection.isNull && intersection.width > 0 && intersection.height > 0 else {
 return 0
 }

 let intersectionArea = intersection.width * intersection.height
 let unionArea = (a.width * a.height) + (b.width * b.height) - intersectionArea
 guard unionArea > 0 else { return 0 }
 return intersectionArea / unionArea
}

func quadrilateralArea(_ quad: Quadrilateral) -> Double {
 let points = quadrilateralPoints(quad)
 guard points.count >= 4 else { return 0 }

 var area: Double = 0
 for index in points.indices {
 let nextIndex = (index + 1) % points.count
 area += Double(points[index].x * points[nextIndex].y)
 area -= Double(points[nextIndex].x * points[index].y)
 }
 return abs(area) / 2
}

Step 6: Build the Capture-to-Review Workflow in SwiftUI

The app keeps capture, review, and page ordering inside one SwiftUI state machine. ScannerScreen handles import, manual capture, and thumbnails, while ResultsScreen exposes export, retake, edit, rotate, and sort actions.

import SwiftUI
import PhotosUI

struct ScannerScreen: View {
 @EnvironmentObject private var store: DocumentScannerStore
 @State private var manualCaptureToken = 0
 @State private var selectedPhotoItem: PhotosPickerItem?

 var body: some View {
 VStack(spacing: 18) {
 ZStack(alignment: .topLeading) {
 CameraScannerView(manualCaptureToken: manualCaptureToken, settings: store.autoCaptureSettings)
 .frame(maxWidth: .infinity)
 .aspectRatio(3 / 4, contentMode: .fit)

 if store.autoCaptureFlashVisible {
 Text("Auto captured")
 }
 }

 HStack(spacing: 18) {
 PhotosPicker(selection: $selectedPhotoItem, matching: .images) {
 Label("Import", systemImage: "photo.on.rectangle.angled")
 }

 Spacer()

 Button {
 manualCaptureToken += 1
 } label: {
 Circle()
 .fill(Color.white)
 .frame(width: 86, height: 86)
 }

 Spacer()

 Button {
 store.openResults()
 } label: {
 Label("Next", systemImage: "arrow.right")
 }
 .disabled(store.pages.isEmpty)
 }

 VStack(alignment: .leading, spacing: 10) {
 if store.pages.isEmpty || store.retakePageIndex != nil {
 RoundedRectangle(cornerRadius: 22, style: .continuous)
 .frame(height: 84)
 } else {
 ScrollView(.horizontal, showsIndicators: false) {
 HStack(spacing: 12) {
 ForEach(Array(store.pages.enumerated()), id: \.element.id) { index, page in
 ThumbnailCard(page: page, index: index + 1) {
 store.openResults(from: index)
 } onRemove: {
 store.removePage(id: page.id)
 }
 }
 }
 }
 }
 }
 }
 .onChange(of: selectedPhotoItem) { newValue in
 guard let newValue else { return }
 Task {
 if let data = try? await newValue.loadTransferable(type: Data.self) {
 await MainActor.run {
 store.importPhotoData(data)
 }
 } else {
 await MainActor.run {
 store.errorMessage = "Unable to load the selected photo."
 }
 }
 await MainActor.run {
 selectedPhotoItem = nil
 }
 }
 }
 }
}
struct ResultsScreen: View {
 @EnvironmentObject private var store: DocumentScannerStore

 var body: some View {
 VStack(spacing: 18) {
 HStack(alignment: .top) {
 Spacer()

 Menu {
 Button("Export PDF") {
 store.exportPDF()
 }
 Button("Export Images") {
 store.exportImages()
 }
 } label: {
 Image(systemName: "square.and.arrow.up")
 }
 }

 if store.pages.isEmpty {
 Text("Capture a page before reviewing results.")
 } else {
 TabView(selection: $store.selectedPageIndex) {
 ForEach(Array(store.pages.enumerated()), id: \.element.id) { index, page in
 ZoomablePageView(page: page)
 .tag(index)
 }
 }

 HStack(spacing: 12) {
 ActionChip(title: "Continue", systemImage: "plus.viewfinder") {
 store.continueScanning()
 }
 ActionChip(title: "Retake", systemImage: "camera.rotate") {
 store.startRetake()
 }
 ActionChip(title: "Edit", systemImage: "crop") {
 store.presentEditor()
 }
 .disabled(!(store.currentPage?.canEdit ?? false))
 ActionChip(title: "Rotate", systemImage: "rotate.right") {
 store.rotateSelectedPage()
 }
 ActionChip(title: "Sort", systemImage: "square.grid.2x2") {
 store.route = .sort
 }
 }
 }
 }
 }
}

Step 7: Render Final Pages in Color, Grayscale, or Binary

👁 iOS mobile scanner app for capturing multi documents

Each page keeps either a normalized document image from Capture Vision or a fallback UIImage. The rendering pipeline converts grayscale and binary variants on demand, then rotates the preview or export output without mutating the original source.

struct ScannedPage: Identifiable {
 let id = UUID()
 var originalImageData: ImageData?
 var normalizedImageData: ImageData?
 var quad: Quadrilateral?
 var fallbackImage: UIImage?
 var colorMode: DocumentColorMode = .color
 var rotationQuarterTurns: Int = 0

 var canEdit: Bool {
 originalImageData != nil && quad != nil
 }

 func renderedImage(processor: ImageProcessor = ImageProcessor()) -> UIImage? {
 let rendered: UIImage?

 if let normalizedImageData {
 var workingImage = normalizedImageData
 switch colorMode {
 case .color:
 break
 case .grayscale:
 workingImage = processor.convert(toGray: workingImage)
 case .binary:
 let grayscaleImage = processor.convert(toGray: workingImage)
 workingImage = processor.convert(toBinaryLocal: grayscaleImage)
 }
 rendered = try? workingImage.toUIImage()
 } else if let fallbackImage {
 rendered = fallbackImage.processed(for: colorMode)
 } else {
 rendered = nil
 }

 return rendered?.rotated(quarterTurns: rotationQuarterTurns)
 }
}
import UIKit

extension UIImage {
 func rotated(quarterTurns: Int) -> UIImage {
 let normalizedQuarterTurns = ((quarterTurns % 4) + 4) % 4
 guard normalizedQuarterTurns != 0 else { return self }

 let angle = CGFloat(normalizedQuarterTurns) * (.pi / 2)
 let rotatedSize = normalizedQuarterTurns.isMultiple(of: 2) ? size : CGSize(width: size.height, height: size.width)

 let renderer = UIGraphicsImageRenderer(size: rotatedSize)
 return renderer.image { context in
 context.cgContext.translateBy(x: rotatedSize.width / 2, y: rotatedSize.height / 2)
 context.cgContext.rotate(by: angle)
 draw(in: CGRect(x: -size.width / 2, y: -size.height / 2, width: size.width, height: size.height))
 }
 }

 func processed(for mode: DocumentColorMode) -> UIImage {
 switch mode {
 case .color:
 return self
 case .grayscale:
 return pixelProcessed(binary: false)
 case .binary:
 return pixelProcessed(binary: true)
 }
 }
}

Source Code

https://github.com/yushulx/ios-swiftui-barcode-mrz-document-scanner/tree/main/examples/DynamsoftDocumentScanner