International ID OCR

Automatically extract data from Identity documents.

Mindee’s International ID API uses deep learning to automatically, accurately, and instantaneously parse your documents details. In a few seconds, the API extracts a set of data from your PDFs or photos of ID’s from all locations, including:

  • Document Type
  • Document Number
  • Surnames
  • Given Names
  • Sex
  • Birth Date
  • Birth Place
  • Nationality
  • Personal Number
  • Country of Issue
  • State of Issue
  • Issue Date
  • Expiration Date
  • Address
  • MRZ Line 1
  • MRZ Line 2
  • MRZ Line 3

Set up the API

📘

Before making any API calls, you need to have created your API key.

  1. You'll need an ID Card or a passport or any other identification document. You can use one of the sample documents provided below.


  1. Access your document by clicking on the corresponding product card in the Document Catalog

  1. From the left navigation, go to documentation > API Reference, you'll find sample code in popular languages and command line.
from mindee import Client, product, AsyncPredictResponse # Init a new client mindee_client = Client(api_key="my-api-key-here") # Load a file from disk input_doc = mindee_client.source_from_path("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( product.InternationalIdV2, input_doc, ) # Print a brief summary of the parsed data print(result.document)
const mindee = require("mindee"); // for TS or modules: // import * as mindee from "mindee"; // Init a new client const mindeeClient = new mindee.Client({ apiKey: "my-api-key-here" }); // Load a file from disk const inputSource = mindeeClient.docFromPath("/path/to/the/file.ext"); // Parse the file const apiResponse = mindeeClient.enqueueAndParse( mindee.product.InternationalIdV2, inputSource ); // Handle the response Promise apiResponse.then((resp) => { // print a string summary console.log(resp.document.toString()); });
using Mindee; using Mindee.Input; using Mindee.Product.InternationalId; string apiKey = "my-api-key-here"; string filePath = "/path/to/the/file.ext"; // Construct a new client MindeeClient mindeeClient = new MindeeClient(apiKey); // Load an input source as a path string // Other input types can be used, as mentioned in the docs var inputSource = new LocalInputSource(filePath); // Call the product asynchronously with auto-polling var response = await mindeeClient .EnqueueAndParseAsync<InternationalIdV2>(inputSource); // Print a summary of all the predictions System.Console.WriteLine(response.Document.ToString()); // Print only the document-level predictions // System.Console.WriteLine(response.Document.Inference.Prediction.ToString());
# # Install the Ruby client library by running: # gem install mindee # require 'mindee' # Init a new client mindee_client = Mindee::Client.new(api_key: 'my-api-key') # Load a file from disk input_source = mindee_client.source_from_path('/path/to/the/file.ext') # Parse the file result = mindee_client.parse( input_source, Mindee::Product::InternationalId::InternationalIdV2 ) # Print a full summary of the parsed data in RST format puts result.document # Print the document-level parsed data # puts result.document.inference.prediction
import com.mindee.MindeeClient; import com.mindee.input.LocalInputSource; import com.mindee.parsing.common.AsyncPredictResponse; import com.mindee.product.internationalid.InternationalIdV2; import java.io.File; import java.io.IOException; public class SimpleMindeeClient { public static void main(String[] args) throws IOException, InterruptedException { String apiKey = "my-api-key-here"; String filePath = "/path/to/the/file.ext"; // Init a new client MindeeClient mindeeClient = new MindeeClient(apiKey); // Load a file from disk LocalInputSource inputSource = new LocalInputSource(new File(filePath)); // Parse the file asynchronously AsyncPredictResponse<InternationalIdV2> response = mindeeClient.enqueueAndParse( InternationalIdV2.class, inputSource ); // Print a summary of the response System.out.println(response.toString()); // Print a summary of the predictions // System.out.println(response.getDocumentObj().toString()); // Print the document-level predictions // System.out.println(response.getDocumentObj().getInference().getPrediction().toString()); // Print the page-level predictions // response.getDocumentObj().getInference().getPages().forEach( // page -> System.out.println(page.toString()) // ); } }
API_KEY='my-api-key-here' ACCOUNT='mindee' ENDPOINT='international_id' VERSION='2' FILE_PATH='/path/to/your/file.png' # Maximum amount of retries to get the result of a queue MAX_RETRIES=10 # Delay between requests DELAY=6 # Enqueue the document for async parsing QUEUE_RESULT=$(curl -sS --request POST \ -H "Authorization: Token $API_KEY" \ -H "Content-Type: multipart/form-data" \ -F "document=@$FILE_PATH" \ "https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/predict_async") # Status code sent back from the server STATUS_CODE=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']status_code[\"|']:[\s][\"|']*[a-zA-Z0-9-]*" | rev | cut --complement -f2- -d" " | rev) # Check that the document was properly queued if [ -z "$STATUS_CODE" ] || [ "$STATUS_CODE" -gt 399 ] || [ "$STATUS_CODE" -lt 200 ] then if [ -z "$STATUS_CODE" ] then echo "Request couldn't be processed." exit 1 fi echo "Error $STATUS_CODE was returned by API during enqueuing. " # Print the additional details, if there are any: ERROR=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']error[\"|']:[\s]\{[^\}]*" | rev | cut --complement -f2- -d"{" | rev) if [ -z "$ERROR" ] then exit 1 fi # Details on the potential error: ERROR_CODE=$(echo "$ERROR" | grep -oP "[\"|']code[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev) MESSAGE=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']message[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev) DETAILS=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']details[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev) echo "This was the given explanation:" echo "-------------------------" echo "Error Code: $ERROR_CODE" echo "Message: $MESSAGE" echo "Details: $DETAILS" echo "-------------------------" exit 1 else echo "File sent, starting to retrieve from server..." # Get the document's queue ID QUEUE_ID=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']id[\"|']:[\s][\"|'][a-zA-Z0-9-]*" | rev | cut --complement -f2- -d"\"" | rev) # Amount of attempts to retrieve the parsed document were made TIMES_TRIED=1 # Try to fetch the file until we get it, or until we hit the maximum amount of retries while [ "$TIMES_TRIED" -lt "$MAX_RETRIES" ] do # Wait for a bit at each step sleep $DELAY # Note: we use -L here because the location of the file might be behind a redirection PARSED_RESULT=$(curl -sS -L \ -H "Authorization: Token $API_KEY" \ "https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/documents/queue/$QUEUE_ID") # Isolating the job (queue) & the status to monitor the document JOB=$(echo "$PARSED_RESULT" | grep -ioP "[\"|']job[\"|']:[\s]\{[^\}]*" | rev | cut --complement -f2- -d"{" | rev) QUEUE_STATUS=$(echo "$JOB" | grep -ioP "[\"|']status[\"|']:[\s][\"|'][a-zA-Z0-9-]*" | rev | cut --complement -f2- -d"\"" | rev) if [ "$QUEUE_STATUS" = "completed" ] then # Print the result echo "$PARSED_RESULT" # Optional: isolate the document: # DOCUMENT=$(echo "$PARSED_RESULT" | grep -ioP "[\"|']document[\"|']:[\s].*([\"|']job[\"|'])" | rev | cut -f2- -d"," | rev) # echo "{$DOCUMENT}" # Remark: on compatible shells, fields can also be extracted through the use of tools like jq: # DOCUMENT=$(echo "$PARSED_RESULT" | jq '.["document"]') exit 0 fi TIMES_TRIED=$((TIMES_TRIED+1)) done fi echo "Operation aborted, document not retrieved after $TIMES_TRIED tries" exit 1
<?php use Mindee\Client; use Mindee\Product\InternationalId\InternationalIdV2; // Init a new client $mindeeClient = new Client("my-api-key-here"); // Load a file from disk $inputSource = $mindeeClient->sourceFromPath("/path/to/the/file.ext"); // Parse the file asynchronously $apiResponse = $mindeeClient->enqueueAndParse(InternationalIdV2::class, $inputSource); echo $apiResponse->document;

  • Replace my-api-key-here with your new API key, or use the "select an API key" feature and it will be filled automatically.
  • Copy and paste the sample code of your desired choice in your application, code environment or terminal.
  • Replace /path/to/my/file with the path to your document.

❗️

Always remember to replace your API key!

  1. Run your code. You will receive a JSON response with the Id document details.

API Response

Here is the full JSON response you get when you call the API:

{ "api_request": { "error": {}, "resources": [ "document", "job" ], "status": "success", "status_code": 200, "url": "https://api.mindee.net/v1/products/mindee/international_id/v2/documents/65c0af55-2247-45a4-bdea-e0908bca9df9" }, "document": { "id": "65c0af55-2247-45a4-bdea-e0908bca9df9", "inference": { "extras": {}, "finished_at": "2024-10-25T13:42:20.489000", "is_rotation_applied": true, "pages": [ { "extras": {}, "id": 0, "orientation": { "value": 0 }, "prediction": {} } ], "prediction": {...}, "processing_time": 2.647, "product": { "features": [ "document_type", "document_number", "surnames", "given_names", "sex", "birth_date", "birth_place", "nationality", "personal_number", "country_of_issue", "state_of_issue", "issue_date", "expiry_date", "address", "mrz_line1", "mrz_line2", "mrz_line3" ], "name": "mindee/international_id", "type": "standard", "version": "2.1" }, "started_at": "2024-10-25T13:42:17.554000" }, "n_pages": 1, "name": "michelle_obama.jpg" }, "job": { "available_at": "2024-10-25T13:42:20.499000", "error": {}, "id": "c1e1cf27-f4a3-43cb-8bf0-0651a57756eb", "issued_at": "2024-10-25T13:42:17.554000", "status": "completed" } }

You can find the prediction within the prediction key found in  document > inference > prediction for document-level predictions: it contains the different fields extracted at the document level, meaning that for multi-pages PDFs, we reconstruct a single receipt object using all the pages.

{ "document": { ... "prediction": { "address": { "value": null }, "birth_date": { "value": "1964-01-17" }, "birth_place": { "value": "ILLINOIS, U.S.A." }, "country_of_issue": { "value": "USA" }, "document_number": { "value": "910239248" }, "document_type": { "value": "PASSPORT" }, "expiry_date": { "value": "2018-12-05" }, "given_names": [ { "value": "MICHELLE" } ], "issue_date": { "value": "2013-12-06" }, "mrz_line1": { "value": "P<USAOBAMA<<MICHELLE<<<<<<<<<<<<<<<<<<<<<<<<" }, "mrz_line2": { "value": "9102392482USA6401171F1812051900781200<129676" }, "mrz_line3": { "value": null }, "nationality": { "value": "USA" }, "personal_number": { "value": null }, "sex": { "value": "F" }, "state_of_issue": { "value": null }, "surnames": [ { "value": "OBAMA" } ] }, ... }

Extracted data

Using the above Bill of Lading example the following are the basic fields that can be extracted.

Document Type

  • document_type: The type of personal identification document. Classification among the following values: IDENTIFICATION_CARD, PASSPORT, DRIVER_LICENCE, VISA, RESIDENCY_CARD, VOTER_REGISTRATION
{ "document_type": { "value": "PASSPORT" } }

Document Number

  • document_number: The unique identifier assigned to the document.
{ "document_number": { "value": "910239248" } }

Surnames

  • surnames: The list of document holder’s family names.
{ "surnames": [ { "value": "OBAMA" } ] }

Given Names

  • given-names: The list of the document holder’s given names.
{ "given_names": [ { "value": "MICHELLE" } ] }

Sex

  • sex: The biological sex of the document holder.
{ "sex": { "value": "F" } }

Birth Date

  • birth_date: The date of birth of the document holder.
{ "birth_date": { "value": "1964-01-17" } }

Birth Place

  • birth_place: The place of birth of the document holder.
{ "birth_place": { "value": "ILLINOIS, U.S.A." } }

Nationality

  • nationality: The country of citizenship of the document holder.
{ "nationality": { "value": "USA" } }

Personal Number

  • personal_number: The unique identifier assigned to the document holder.
{ "personal_number": { "value": null } }

Country of Issue

  • country_of_issue: The country where the document was issued.
{ "country_of_issue": { "value": "USA" } }

State of Issue

state_of_issue: The state or territory where the document was issued

{ "state_of_issue": { "value": Null } }

Issue Date

  • issue_date: The date when the document was issued.
{ "issue_date": { "value": "2013-12-06" } }

Expiration Date

  • expiration_date: The date when the document becomes invalid.
{ "expiration_date": { "value": "2018-12-05" } }

Address

  • address: The physical address of the document holder.
{ "address": { "value": null } }

MRZ

  • mrz_line1: the Machine Readable zone, first line.
{ "mrz_line1": { "value": "P<USAOBAMA<<MICHELLE<<<<<<<<<<<<<<<<<<<<<<<<" } }
  • mrz_line2: the Machine Readable zone, second line.
{ "mrz_line2": { "value": "9102392482USA6401171F1812051900781200<129676" } }
  • mrz_line3: the Machine Readable zone, third line.
{ "mrz_line3": { "value": null } }

Questions?
Slack Logo Icon  Join our Slack


Did this page help you?