azure-ai-document-intelligence-ts

Azure Document Intelligence REST SDK for TypeScript

Extract text, tables, and structured data from documents using prebuilt and custom models.

Installation

npm install @azure-rest/ai-document-intelligence @azure/identity

Environment Variables

DOCUMENT_INTELLIGENCE_ENDPOINT=https://<resource>.cognitiveservices.azure.com DOCUMENT_INTELLIGENCE_API_KEY=<api-key>

Authentication

Important: This is a REST client. DocumentIntelligence is a function, not a class.

DefaultAzureCredential

import DocumentIntelligence from "@azure-rest/ai-document-intelligence"; import { DefaultAzureCredential } from "@azure/identity";

const client = DocumentIntelligence( process.env.DOCUMENT_INTELLIGENCE_ENDPOINT!, new DefaultAzureCredential() );

API Key

import DocumentIntelligence from "@azure-rest/ai-document-intelligence";

const client = DocumentIntelligence( process.env.DOCUMENT_INTELLIGENCE_ENDPOINT!, { key: process.env.DOCUMENT_INTELLIGENCE_API_KEY! } );

Analyze Document (URL)

import DocumentIntelligence, { isUnexpected, getLongRunningPoller, AnalyzeOperationOutput } from "@azure-rest/ai-document-intelligence";

const initialResponse = await client .path("/documentModels/{modelId}:analyze", "prebuilt-layout") .post({ contentType: "application/json", body: { urlSource: "https://example.com/document.pdf" }, queryParameters: { locale: "en-US" } });

if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

const poller = getLongRunningPoller(client, initialResponse); const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;

console.log("Pages:", result.analyzeResult?.pages?.length); console.log("Tables:", result.analyzeResult?.tables?.length);

Analyze Document (Local File)

import { readFile } from "node:fs/promises";

const fileBuffer = await readFile("./document.pdf"); const base64Source = fileBuffer.toString("base64");

const initialResponse = await client .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") .post({ contentType: "application/json", body: { base64Source } });

if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

const poller = getLongRunningPoller(client, initialResponse); const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;

Prebuilt Models

Model ID Description

prebuilt-read

OCR - text and language extraction

prebuilt-layout

Text, tables, selection marks, structure

prebuilt-invoice

Invoice fields

prebuilt-receipt

Receipt fields

prebuilt-idDocument

ID document fields

prebuilt-tax.us.w2

W-2 tax form fields

prebuilt-healthInsuranceCard.us

Health insurance card fields

prebuilt-contract

Contract fields

prebuilt-bankStatement.us

Bank statement fields

Extract Invoice Fields

const initialResponse = await client .path("/documentModels/{modelId}:analyze", "prebuilt-invoice") .post({ contentType: "application/json", body: { urlSource: invoiceUrl } });

if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

const poller = getLongRunningPoller(client, initialResponse); const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;

const invoice = result.analyzeResult?.documents?.[0]; if (invoice) { console.log("Vendor:", invoice.fields?.VendorName?.content); console.log("Total:", invoice.fields?.InvoiceTotal?.content); console.log("Due Date:", invoice.fields?.DueDate?.content); }

Extract Receipt Fields

const initialResponse = await client .path("/documentModels/{modelId}:analyze", "prebuilt-receipt") .post({ contentType: "application/json", body: { urlSource: receiptUrl } });

const poller = getLongRunningPoller(client, initialResponse); const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;

const receipt = result.analyzeResult?.documents?.[0]; if (receipt) { console.log("Merchant:", receipt.fields?.MerchantName?.content); console.log("Total:", receipt.fields?.Total?.content);

for (const item of receipt.fields?.Items?.values || []) { console.log("Item:", item.properties?.Description?.content); console.log("Price:", item.properties?.TotalPrice?.content); } }

List Document Models

import DocumentIntelligence, { isUnexpected, paginate } from "@azure-rest/ai-document-intelligence";

const response = await client.path("/documentModels").get();

if (isUnexpected(response)) { throw response.body.error; }

for await (const model of paginate(client, response)) { console.log(model.modelId); }

Build Custom Model

const initialResponse = await client.path("/documentModels:build").post({ body: { modelId: "my-custom-model", description: "Custom model for purchase orders", buildMode: "template", // or "neural" azureBlobSource: { containerUrl: process.env.TRAINING_CONTAINER_SAS_URL!, prefix: "training-data/" } } });

if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

const poller = getLongRunningPoller(client, initialResponse); const result = await poller.pollUntilDone(); console.log("Model built:", result.body);

Build Document Classifier

import { DocumentClassifierBuildOperationDetailsOutput } from "@azure-rest/ai-document-intelligence";

const containerSasUrl = process.env.TRAINING_CONTAINER_SAS_URL!;

const initialResponse = await client.path("/documentClassifiers:build").post({ body: { classifierId: "my-classifier", description: "Invoice vs Receipt classifier", docTypes: { invoices: { azureBlobSource: { containerUrl: containerSasUrl, prefix: "invoices/" } }, receipts: { azureBlobSource: { containerUrl: containerSasUrl, prefix: "receipts/" } } } } });

if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

const poller = getLongRunningPoller(client, initialResponse); const result = (await poller.pollUntilDone()).body as DocumentClassifierBuildOperationDetailsOutput; console.log("Classifier:", result.result?.classifierId);

Classify Document

const initialResponse = await client .path("/documentClassifiers/{classifierId}:analyze", "my-classifier") .post({ contentType: "application/json", body: { urlSource: documentUrl }, queryParameters: { split: "auto" } });

if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

const poller = getLongRunningPoller(client, initialResponse); const result = await poller.pollUntilDone(); console.log("Classification:", result.body.analyzeResult?.documents);

Get Service Info

const response = await client.path("/info").get();

if (isUnexpected(response)) { throw response.body.error; }

console.log("Custom model limit:", response.body.customDocumentModels.limit); console.log("Custom model count:", response.body.customDocumentModels.count);

Polling Pattern

import DocumentIntelligence, { isUnexpected, getLongRunningPoller, AnalyzeOperationOutput } from "@azure-rest/ai-document-intelligence";

// 1. Start operation const initialResponse = await client .path("/documentModels/{modelId}:analyze", "prebuilt-layout") .post({ contentType: "application/json", body: { urlSource } });

// 2. Check for errors if (isUnexpected(initialResponse)) { throw initialResponse.body.error; }

// 3. Create poller const poller = getLongRunningPoller(client, initialResponse);

// 4. Optional: Monitor progress poller.onProgress((state) => { console.log("Status:", state.status); });

// 5. Wait for completion const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;

Key Types

import DocumentIntelligence, { isUnexpected, getLongRunningPoller, paginate, parseResultIdFromResponse, AnalyzeOperationOutput, DocumentClassifierBuildOperationDetailsOutput } from "@azure-rest/ai-document-intelligence";

Best Practices

Use getLongRunningPoller() - Document analysis is async, always poll for results
Check isUnexpected() - Type guard for proper error handling
Choose the right model - Use prebuilt models when possible, custom for specialized docs
Handle confidence scores - Fields have confidence values, set thresholds for your use case
Use pagination - Use paginate() helper for listing models
Prefer neural mode - For custom models, neural handles more variation than template

azure-ai-document-intelligence-ts

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

github-issue-creator

azure-observability

azure-appconfiguration-java

copilot-sdk