International ID OCR
Automatically extract data from Identity documents.
Mindee’s International ID API uses deep learning to automatically, accurately, and instantaneously parse your documents details. In a few seconds, the API extracts a set of data from your PDFs or photos of ID’s from all locations, including:
- Document Type
- Document Number
- Surnames
- Given Names
- Sex
- Birth Date
- Birth Place
- Nationality
- Personal Number
- Country of Issue
- State of Issue
- Issue Date
- Expiration Date
- Address
- MRZ Line 1
- MRZ Line 2
- MRZ Line 3
Set up the API
Before making any API calls, you need to have created your API key.
- You'll need an ID Card or a passport or any other identification document. You can use one of the sample documents provided below.
- Access your document by clicking on the corresponding product card in the Document Catalog
- From the left navigation, go to documentation > API Reference, you'll find sample code in popular languages and command line.
from mindee import Client, product, AsyncPredictResponse
# Init a new client
mindee_client = Client(api_key="my-api-key-here")
# Load a file from disk
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
# Load a file from disk and enqueue it.
result: AsyncPredictResponse = mindee_client.enqueue_and_parse(
product.InternationalIdV2,
input_doc,
)
# Print a brief summary of the parsed data
print(result.document)
const mindee = require("mindee");
// for TS or modules:
// import * as mindee from "mindee";
// Init a new client
const mindeeClient = new mindee.Client({ apiKey: "my-api-key-here" });
// Load a file from disk
const inputSource = mindeeClient.docFromPath("/path/to/the/file.ext");
// Parse the file
const apiResponse = mindeeClient.enqueueAndParse(
mindee.product.InternationalIdV2,
inputSource
);
// Handle the response Promise
apiResponse.then((resp) => {
// print a string summary
console.log(resp.document.toString());
});
using Mindee;
using Mindee.Input;
using Mindee.Product.InternationalId;
string apiKey = "my-api-key-here";
string filePath = "/path/to/the/file.ext";
// Construct a new client
MindeeClient mindeeClient = new MindeeClient(apiKey);
// Load an input source as a path string
// Other input types can be used, as mentioned in the docs
var inputSource = new LocalInputSource(filePath);
// Call the product asynchronously with auto-polling
var response = await mindeeClient
.EnqueueAndParseAsync<InternationalIdV2>(inputSource);
// Print a summary of all the predictions
System.Console.WriteLine(response.Document.ToString());
// Print only the document-level predictions
// System.Console.WriteLine(response.Document.Inference.Prediction.ToString());
require 'mindee'
# Init a new client
mindee_client = Mindee::Client.new(api_key: 'my-api-key-here')
# Load a file from disk
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
# Parse the file
result = mindee_client.enqueue_and_parse(
input_source,
Mindee::Product::InternationalId::InternationalIdV2
)
# Print a full summary of the parsed data in RST format
puts result.document
# Print the document-level parsed data
# puts result.document.inference.prediction
API_KEY='my-api-key-here'
ACCOUNT='mindee'
ENDPOINT='international_id'
VERSION='2'
FILE_PATH='/path/to/your/file.png'
# Maximum amount of retries to get the result of a queue
MAX_RETRIES=10
# Delay between requests
DELAY=6
# Enqueue the document for async parsing
QUEUE_RESULT=$(curl -sS --request POST \
-H "Authorization: Token $API_KEY" \
-H "Content-Type: multipart/form-data" \
-F "document=@$FILE_PATH" \
"https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/predict_async")
# Status code sent back from the server
STATUS_CODE=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']status_code[\"|']:[\s][\"|']*[a-zA-Z0-9-]*" | rev | cut --complement -f2- -d" " | rev)
# Check that the document was properly queued
if [ -z "$STATUS_CODE" ] || [ "$STATUS_CODE" -gt 399 ] || [ "$STATUS_CODE" -lt 200 ]
then
if [ -z "$STATUS_CODE" ]
then
echo "Request couldn't be processed."
exit 1
fi
echo "Error $STATUS_CODE was returned by API during enqueuing. "
# Print the additional details, if there are any:
ERROR=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']error[\"|']:[\s]\{[^\}]*" | rev | cut --complement -f2- -d"{" | rev)
if [ -z "$ERROR" ]
then
exit 1
fi
# Details on the potential error:
ERROR_CODE=$(echo "$ERROR" | grep -oP "[\"|']code[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev)
MESSAGE=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']message[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev)
DETAILS=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']details[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev)
echo "This was the given explanation:"
echo "-------------------------"
echo "Error Code: $ERROR_CODE"
echo "Message: $MESSAGE"
echo "Details: $DETAILS"
echo "-------------------------"
exit 1
else
echo "File sent, starting to retrieve from server..."
# Get the document's queue ID
QUEUE_ID=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']id[\"|']:[\s][\"|'][a-zA-Z0-9-]*" | rev | cut --complement -f2- -d"\"" | rev)
# Amount of attempts to retrieve the parsed document were made
TIMES_TRIED=1
# Try to fetch the file until we get it, or until we hit the maximum amount of retries
while [ "$TIMES_TRIED" -lt "$MAX_RETRIES" ]
do
# Wait for a bit at each step
sleep $DELAY
# Note: we use -L here because the location of the file might be behind a redirection
PARSED_RESULT=$(curl -sS -L \
-H "Authorization: Token $API_KEY" \
"https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/documents/queue/$QUEUE_ID")
# Isolating the job (queue) & the status to monitor the document
JOB=$(echo "$PARSED_RESULT" | grep -ioP "[\"|']job[\"|']:[\s]\{[^\}]*" | rev | cut --complement -f2- -d"{" | rev)
QUEUE_STATUS=$(echo "$JOB" | grep -ioP "[\"|']status[\"|']:[\s][\"|'][a-zA-Z0-9-]*" | rev | cut --complement -f2- -d"\"" | rev)
if [ "$QUEUE_STATUS" = "completed" ]
then
# Print the result
echo "$PARSED_RESULT"
# Optional: isolate the document:
# DOCUMENT=$(echo "$PARSED_RESULT" | grep -ioP "[\"|']document[\"|']:[\s].*([\"|']job[\"|'])" | rev | cut -f2- -d"," | rev)
# echo "{$DOCUMENT}"
# Remark: on compatible shells, fields can also be extracted through the use of tools like jq:
# DOCUMENT=$(echo "$PARSED_RESULT" | jq '.["document"]')
exit 0
fi
TIMES_TRIED=$((TIMES_TRIED+1))
done
fi
echo "Operation aborted, document not retrieved after $TIMES_TRIED tries"
exit 1
<?php
use Mindee\Client;
use Mindee\Product\InternationalId\InternationalIdV2;
// Init a new client
$mindeeClient = new Client("my-api-key-here");
// Load a file from disk
$inputSource = $mindeeClient->sourceFromPath("/path/to/the/file.ext");
// Parse the file asynchronously
$apiResponse = $mindeeClient->enqueueAndParse(InternationalIdV2::class, $inputSource);
echo $apiResponse->document;
- Replace my-api-key-here with your new API key, or use the "select an API key" feature and it will be filled automatically.
- Copy and paste the sample code of your desired choice in your application, code environment or terminal.
- Replace
/path/to/my/file
with the path to your document.
Always remember to replace your API key!
- Run your code. You will receive a JSON response with the Id document details.
API Response
Here is the full JSON response you get when you call the API:
{
"api_request": {
"error": {},
"resources": [
"document",
"job"
],
"status": "success",
"status_code": 200,
"url": "https://api.mindee.net/v1/products/mindee/international_id/v2/documents/65c0af55-2247-45a4-bdea-e0908bca9df9"
},
"document": {
"id": "65c0af55-2247-45a4-bdea-e0908bca9df9",
"inference": {
"extras": {},
"finished_at": "2024-10-25T13:42:20.489000",
"is_rotation_applied": true,
"pages": [
{
"extras": {},
"id": 0,
"orientation": {
"value": 0
},
"prediction": {}
}
],
"prediction": {...},
"processing_time": 2.647,
"product": {
"features": [
"document_type",
"document_number",
"surnames",
"given_names",
"sex",
"birth_date",
"birth_place",
"nationality",
"personal_number",
"country_of_issue",
"state_of_issue",
"issue_date",
"expiry_date",
"address",
"mrz_line1",
"mrz_line2",
"mrz_line3"
],
"name": "mindee/international_id",
"type": "standard",
"version": "2.1"
},
"started_at": "2024-10-25T13:42:17.554000"
},
"n_pages": 1,
"name": "michelle_obama.jpg"
},
"job": {
"available_at": "2024-10-25T13:42:20.499000",
"error": {},
"id": "c1e1cf27-f4a3-43cb-8bf0-0651a57756eb",
"issued_at": "2024-10-25T13:42:17.554000",
"status": "completed"
}
}
You can find the prediction within the prediction
key found in document > inference > prediction
for document-level predictions: it contains the different fields extracted at the document level, meaning that for multi-pages PDFs, we reconstruct a single receipt object using all the pages.
{
"document": {
...
"prediction": {
"address": {
"value": null
},
"birth_date": {
"value": "1964-01-17"
},
"birth_place": {
"value": "ILLINOIS, U.S.A."
},
"country_of_issue": {
"value": "USA"
},
"document_number": {
"value": "910239248"
},
"document_type": {
"value": "PASSPORT"
},
"expiry_date": {
"value": "2018-12-05"
},
"given_names": [
{
"value": "MICHELLE"
}
],
"issue_date": {
"value": "2013-12-06"
},
"mrz_line1": {
"value": "P<USAOBAMA<<MICHELLE<<<<<<<<<<<<<<<<<<<<<<<<"
},
"mrz_line2": {
"value": "9102392482USA6401171F1812051900781200<129676"
},
"mrz_line3": {
"value": null
},
"nationality": {
"value": "USA"
},
"personal_number": {
"value": null
},
"sex": {
"value": "F"
},
"state_of_issue": {
"value": null
},
"surnames": [
{
"value": "OBAMA"
}
]
},
...
}
Extracted data
Using the above Bill of Lading example the following are the basic fields that can be extracted.
- Document Type
- Document Number
- Surnames
- Given Names
- Sex
- Birth Date
- Birth Place
- Nationality
- Personal Number
- Country of Issue
- State of Issue
- Issue Date
- Expiration Date
- Address
- MRZ
Document Type
- document_type: The type of personal identification document. Classification among the following values: IDENTIFICATION_CARD, PASSPORT, DRIVER_LICENCE, VISA, RESIDENCY_CARD, VOTER_REGISTRATION
{
"document_type": {
"value": "PASSPORT"
}
}
Document Number
- document_number: The unique identifier assigned to the document.
{
"document_number": {
"value": "910239248"
}
}
Surnames
- surnames: The list of document holder’s family names.
{
"surnames": [
{
"value": "OBAMA"
}
]
}
Given Names
- given-names: The list of the document holder’s given names.
{
"given_names": [
{
"value": "MICHELLE"
}
]
}
Sex
- sex: The biological sex of the document holder.
{
"sex": {
"value": "F"
}
}
Birth Date
- birth_date: The date of birth of the document holder.
{
"birth_date": {
"value": "1964-01-17"
}
}
Birth Place
- birth_place: The place of birth of the document holder.
{
"birth_place": {
"value": "ILLINOIS, U.S.A."
}
}
Nationality
- nationality: The country of citizenship of the document holder.
{
"nationality": {
"value": "USA"
}
}
Personal Number
- personal_number: The unique identifier assigned to the document holder.
{
"personal_number": {
"value": null
}
}
Country of Issue
- country_of_issue: The country where the document was issued.
{
"country_of_issue": {
"value": "USA"
}
}
State of Issue
state_of_issue: The state or territory where the document was issued
{
"state_of_issue": {
"value": Null
}
}
Issue Date
- issue_date: The date when the document was issued.
{
"issue_date": {
"value": "2013-12-06"
}
}
Expiration Date
- expiration_date: The date when the document becomes invalid.
{
"expiration_date": {
"value": "2018-12-05"
}
}
Address
- address: The physical address of the document holder.
{
"address": {
"value": null
}
}
MRZ
- mrz_line1: the Machine Readable zone, first line.
{
"mrz_line1": {
"value": "P<USAOBAMA<<MICHELLE<<<<<<<<<<<<<<<<<<<<<<<<"
}
}
- mrz_line2: the Machine Readable zone, second line.
{
"mrz_line2": {
"value": "9102392482USA6401171F1812051900781200<129676"
}
}
- mrz_line3: the Machine Readable zone, third line.
{
"mrz_line3": {
"value": null
}
}
Questions?
Join our Slack
Updated about 2 months ago