US W9 OCR

Automatically extract data from US W9 forms.

Mindee’s US W9 OCR API uses deep learning to automatically, accurately, and instantaneously parse your W9 forms details. In under a second, the API extracts a set of data from your PDFs or photos of W9 forms, including:

  • Name
  • SSN
  • Address
  • City State Zip
  • Business Name
  • EIN
  • Tax Classification
  • Tax Classification Other Details
  • W9 Revision Date
  • Signature Position
  • Signature Date Position
  • Tax Classification LLC

The US W9 OCR API supports the new and old format for all people in France. The W9 forms of other countries and states are not supported with this model.

Set up the API

📘

Before making any API calls, you need to have created your API key.

  1. You'll need a US W9 form. You can use one of the sample documents provided below.
  1. Access your Receipt API by clicking on the US W9 card in the Document Catalog
  1. From the left navigation, go to documentation > API Reference, you'll find sample code in popular languages and command line.
curl -X POST \\

  https://api.mindee.net/v1/products/mindee/us_w9/v1/predict \\

  -H 'Authorization: Token my-api-key-here' \\

  -H 'content-type: multipart/form-data' \\

  -F document=@/path/to/your/file.png
import json

import requests


api_key = "my-api-key-here"

account = "mindee"

endpoint = "us_w9"

version = "1.0"


url = f"https://api.mindee.net/v1/products/{account}/{endpoint}/v{version}/predict"


with open("/path/to/the/file.ext", "rb") as file_handle:

    files = {"document": file_handle}

    headers = {"Authorization": f"Token {api_key}"}

    response = requests.post(url, files=files, headers=headers)


json_response = response.json()


if not response.ok:

    raise RuntimeError(json_response["api_request"]["error"])


print(json.dumps(json_response["document"], indent=2))
// works for NODE > v10

const axios = require("axios");

const fs = require("fs");

const FormData = require("form-data");


const apiKey = "my-api-key-here";

const account = "mindee";

const endpoint = "us_w9";

const version = "1.0";


async function makeRequest(filePath) {

  let data = new FormData();

  data.append("document", fs.createReadStream(filePath));

  const config = {

    method: "POST",

    url: `https://api.mindee.net/v1/products/${account}/${endpoint}/v${version}/predict`,

    headers: {

      Authorization: `Token ${apiKey}`,

      ...data.getHeaders()

    },

    data

  }

  try {

    let response = await axios(config)

    console.log(response.data);

  } catch (err) {

    console.error(err);

    process.exit(1);

  }

}


makeRequest("/path/to/the/file.ext");
require 'uri'

require 'net/http'

require 'net/https'


api_key = "my-api-key-here"

account = "mindee"

endpoint = "us_w9"

version = "1.0"


url = URI("https://api.mindee.net/v1/products/#{account}/#{endpoint}/v#{version}/predict")

file = '/path/to/the/file.ext'


http = Net::HTTP.new(url.host, url.port)

http.use_ssl = true


request = Net::HTTP::Post.new(url)

request['Authorization'] = "Token #{api_key}"

request.set_form([['document', File.open(file)]], 'multipart/form-data')


response = http.request(request)

if !response.kind_of? Net::HTTPSuccess

  raise response.msg

end

puts response.read_body
<form onsubmit="mindeeSubmit(event)" >

    <input type="file" id="my-file-input" name="file" />

    <input type="submit" />

</form>


<script type="text/javascript">

    const mindeeSubmit = (evt) => {

        evt.preventDefault()

        let myFileInput = document.getElementById('my-file-input');

        let myFile = myFileInput.files[0]

        if (!myFile) { return }

        let data = new FormData();

        data.append("document", myFile, myFile.name);


        let xhr = new XMLHttpRequest();


        xhr.addEventListener("readystatechange", function () {

            if (this.readyState === 4) {

                console.log(this.responseText);

            }

        });


        xhr.open("POST", "https://infergate.staging-xsdj4gia.mindee.net/v1/products/mindee/us_w9/v1/predict");

        xhr.setRequestHeader("Authorization", "Token my-api-key-here");

        xhr.send(data);

    }

</script>
using Mindee;

using Mindee.Input;

using Mindee.Http;

using Mindee.Parsing;


string apiKey = "my-api-key-here";

string filePath = "/path/to/the/file.ext";


// Construct a new client

MindeeClient mindeeClient = new MindeeClient(apiKey);


// Load an input source as a path string

// Other input types can be used, as mentioned in the docs

var inputSource = new LocalInputSource(filePath);


// Set the endpoint configuration

CustomEndpoint myEndpoint = new CustomEndpoint(

    endpointName: "us_w9",

    accountName: "mindee",

    version: "1.0"

);


var response = await mindeeClient

    .ParseAsync(inputSource, myEndpoint);


// Print a summary of the predictions

System.Console.WriteLine(response.Document.ToString());


// Print the document-level predictions

// System.Console.WriteLine(response.Document.Inference.Prediction.ToString());
import com.mindee.MindeeClient;

import com.mindee.input.LocalInputSource;

import com.mindee.parsing.common.PredictResponse;

import com.mindee.product.custom.CustomV1;

import com.mindee.http.Endpoint;

import java.io.File;

import java.io.IOException;


public class SimpleMindeeClient {


  public static void main(String[] args) throws IOException {

    String apiKey = "my-api-key-here";

    String filePath = "/path/to/the/file.ext";


    // Init a new client

    MindeeClient mindeeClient = new MindeeClient(apiKey);


    // Load a file from disk

    LocalInputSource inputSource = new LocalInputSource(new File(filePath));


    // Configure the endpoint

    Endpoint endpoint = new Endpoint(

        "us_w9",

        "mindee",

        "1.0"

    );


    // Parse the file

    PredictResponse<CustomV1> response =  mindeeClient.parse(

        inputSource,

        endpoint

    );


    // Print a summary of the response

    System.out.println(response.toString());


    // Print a summary of the predictions

//  System.out.println(response.getDocument().toString());


    // Print the document-level predictions

//    System.out.println(response.getDocument().getInference().getPrediction().toString());


    // Print the page-level predictions

//    response.getDocument().getInference().getPages().forEach(

//        page -> System.out.println(page.toString())

//    );

  }


}
<?php


// cURL install check

if (!function_exists('curl_init')) {

    exit("cURL isn't installed for " . phpversion());

}

$API_KEY = 'my-api-key-here';

$FILE_PATH = '/path/to/my/file.pdf';

$MIME_TYPE = 'application/pdf'; // change according to the file type

$ACCOUNT = 'mindee';

$VERSION = '1.0';

$ENDPOINT = 'us_w9';


// Open a cURL session to send the document

$ch = curl_init();


// Setup headers

$headers = array(

  "Authorization: Token $API_KEY"

);


// Add our file to the request

$data = array(

  "document" => new CURLFile(

      $FILE_PATH,

      $MIME_TYPE,

      substr($FILE_PATH, strrpos($FILE_PATH, "/") + 1)

  )

);


// URL for a prediction

$url = "https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/predict";


$options = array(

  CURLOPT_URL => $url,

  CURLOPT_HTTPHEADER => $headers,

  CURLOPT_POSTFIELDS => $data,

  CURLOPT_FOLLOWLOCATION => true,

  CURLOPT_RETURNTRANSFER => true

);


// Set all options for the cURL request

curl_setopt_array(

    $ch,

    $options

);


// Execute the request & extract the query content into a variable

$json = curl_exec($ch);


// Close the cURL session

curl_close($ch);


// Store the response as an array to allow for easier manipulations

$result = json_decode($json, true);


// Print the content of the document as raw json

echo json_encode($result, JSON_PRETTY_PRINT);
  • Replace my-api-key-here with your new API key, or use the "select an API key" feature and it will be filled automatically.
  • Copy and paste the sample code of your desired choice in your application, code environment or terminal.
  • Replace /path/to/my/file with the path to your receipt.

❗️

Always remember to replace your API key!

  1. Run your code. You will receive a JSON response with the receipt details.

API Response

Here is the full JSON response you get when you call the API:

{
  "api_request": {
    "error": {},
    "resources": [
      "document"
    ],
    "status": "success",
    "status_code": 201,
    "url": "https://api.mindee.net/v1/products/mindee/us_w9/v1/predict"
  },
  "document": {
    "id": "e8f24830-30e7-4e79-b411-8c253b627482",
    "name": "sample_w9.jpg",
    "n_pages": 1,
    "is_rotation_applied": true,
    "inference": {
      "started_at": "2023,-05-06T16:37:28",
      "finished_at": "2023-05-06T16:37:29",
      "processing_time": 0.955,
      "pages": [
        {
          "id": 0,
          "orientation": {"value": 0},
          "prediction": { .. },
          "extras": {}
        }
      ],
      "prediction": { .. },
      "extras": {}
    }
  }
}

You can find the prediction within the prediction key found in two locations:

  • In document > inference > prediction for document-level predictions: it contains the different fields extracted at the document level, meaning that for multi-pages PDFs, we reconstruct a single receipt object using all the pages.
  • In document > inference > pages[ ] > prediction for page-level predictions: it gives the prediction for each page independently. With images, there is only one element on this array, but with PDFs, you can find the extracted data for each PDF page.

Each predicted field may contain one or several values:

  • a confidence score
  • a polygon highlighting the information location
  • a page_id where the information was found (document level only)
{
    "created_at": "2023-06-23 17:54:16.593466",
    "finished_at": "2023-06-23 17:54:32.144330",
    "inference_id": "b5cf4c18-5fa1-4aa0-b768-5c05c09f8e06",
    "is_rotation_applied": true,
    "n_documents": 1,
    "n_pages": 1,
    "processing_time": 15.550875902175903,
    "product": {
        "hash": "us-w9",
        "version": "1.0"
    },
    "results": [
        {
            "document": {
                "inference": {
                    "extras": {
                        "orientation": {
                            "pages": [
                                {
                                    "value": 0
                                }
                            ]
                        }
                    },
                    "pages": [],
                    "prediction": {
                        "address": {
                            "confidence": 0.97,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.137,
                                    0.265
                                ],
                                [
                                    0.322,
                                    0.265
                                ],
                                [
                                    0.322,
                                    0.283
                                ],
                                [
                                    0.137,
                                    0.283
                                ]
                            ],
                            "value": "Somewhere In Milky Way"
                        },
                        "business_name": {
                            "confidence": 0.0,
                            "page_id": 0,
                            "polygon": [],
                            "value": null
                        },
                        "city_state_zip": {
                            "confidence": 0.96,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.129,
                                    0.297
                                ],
                                [
                                    0.436,
                                    0.297
                                ],
                                [
                                    0.436,
                                    0.314
                                ],
                                [
                                    0.129,
                                    0.314
                                ]
                            ],
                            "value": "Probably Still At Cambridge P O Box CB1"
                        },
                        "ein": {
                            "confidence": 0.93,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.677,
                                    0.437
                                ],
                                [
                                    0.909,
                                    0.437
                                ],
                                [
                                    0.909,
                                    0.464
                                ],
                                [
                                    0.677,
                                    0.464
                                ]
                            ],
                            "value": "942203664"
                        },
                        "name": {
                            "confidence": 0.98,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.122,
                                    0.116
                                ],
                                [
                                    0.278,
                                    0.116
                                ],
                                [
                                    0.278,
                                    0.133
                                ],
                                [
                                    0.122,
                                    0.133
                                ]
                            ],
                            "value": "Stephen W Hawking"
                        },
                        "signature_date_position": {
                            "page_id": 0,
                            "polygon": []
                        },
                        "signature_position": {
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.223,
                                    0.642
                                ],
                                [
                                    0.369,
                                    0.642
                                ],
                                [
                                    0.369,
                                    0.682
                                ],
                                [
                                    0.223,
                                    0.682
                                ]
                            ]
                        },
                        "ssn": {
                            "confidence": 0.96,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.685,
                                    0.375
                                ],
                                [
                                    0.934,
                                    0.375
                                ],
                                [
                                    0.934,
                                    0.407
                                ],
                                [
                                    0.685,
                                    0.407
                                ]
                            ],
                            "value": "560758145"
                        },
                        "tax_classification": {
                            "confidence": 1.0,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.116,
                                    0.189
                                ],
                                [
                                    0.118,
                                    0.189
                                ],
                                [
                                    0.118,
                                    0.192
                                ],
                                [
                                    0.116,
                                    0.192
                                ]
                            ],
                            "value": "individual"
                        },
                        "tax_classification_llc": {
                            "confidence": 0.0,
                            "page_id": null,
                            "polygon": [],
                            "value": null
                        },
                        "w9_revision_date": {
                            "confidence": 0.64,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.085,
                                    0.071
                                ],
                                [
                                    0.155,
                                    0.071
                                ],
                                [
                                    0.155,
                                    0.085
                                ],
                                [
                                    0.085,
                                    0.085
                                ]
                            ],
                            "value": "august 2013"
                        }
                    }
                },
                "n_pages": 1,
                "name": "2013.jpg"
            }
        }
    ],
    "status": "success"
}

Extracted US W9 data

Depending on the field type specified, additional attributes can be extracted from the US W9 object. Using the above W9 example the following are the basic fields that can be extracted.

Name

  • name: The Name as it is shown on the applicant's income tax return.
{
  "name": {
                            "confidence": 0.98,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.122,
                                    0.116
                                ],
                                [
                                    0.278,
                                    0.116
                                ],
                                [
                                    0.278,
                                    0.133
                                ],
                                [
                                    0.122,
                                    0.133
                                ]
                            ],
                            "value": "Stephen W Hawking"
                        }
}

SSN

  • ssn: Social Security Number of the applicant.
{
  "ssn": {
                            "confidence": 0.96,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.685,
                                    0.375
                                ],
                                [
                                    0.934,
                                    0.375
                                ],
                                [
                                    0.934,
                                    0.407
                                ],
                                [
                                    0.685,
                                    0.407
                                ]
                            ],
                            "value": "560758145"
                        }
}

Address

  • address: This includes the street address (number, street, and apt. or suite number) of the applicant.
{
  "address": {
                            "confidence": 0.97,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.137,
                                    0.265
                                ],
                                [
                                    0.322,
                                    0.265
                                ],
                                [
                                    0.322,
                                    0.283
                                ],
                                [
                                    0.137,
                                    0.283
                                ]
                            ],
                            "value": "Somewhere In Milky Way"
                        }
}

City State Zip

  • city_state_zip: This field includes multiple values like the city, state and corresponding zip code of the applicant.
{
  "city_state_zip": {
                            "confidence": 0.96,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.129,
                                    0.297
                                ],
                                [
                                    0.436,
                                    0.297
                                ],
                                [
                                    0.436,
                                    0.314
                                ],
                                [
                                    0.129,
                                    0.314
                                ]
                            ],
                            "value": "Probably Still At Cambridge P O Box CB1"
                        }
}

Business Name

  • business_name: In case the applicant's name does not belong to a business or disregarded entity, it's name is output here.
{
  "business_name": {
                            "confidence": 0.0,
                            "page_id": 0,
                            "polygon": [],
                            "value": null
                        }
}

EIN

  • ein: This fields outputs the Employer Identification Number.
{
  "ein": {
                            "confidence": 0.93,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.677,
                                    0.437
                                ],
                                [
                                    0.909,
                                    0.437
                                ],
                                [
                                    0.909,
                                    0.464
                                ],
                                [
                                    0.677,
                                    0.464
                                ]
                            ],
                            "value": "942203664"
                        }
}

Tax Classification

  • tax_classification: The federal tax classification, which can vary depending on the revision date.
{
  "tax_classification": {
                            "confidence": 1.0,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.116,
                                    0.189
                                ],
                                [
                                    0.118,
                                    0.189
                                ],
                                [
                                    0.118,
                                    0.192
                                ],
                                [
                                    0.116,
                                    0.192
                                ]
                            ],
                            "value": "individual"
                        }
}

Tax Classification Other Details

  • tax_classification_other_details: This fields includes other details on the tax classification, that were not mentioned before.
{
  "birth_date": {
                    "confidence": 0.99,
                    "polygon": [
                        [0.749, 0.491],
                        [0.929, 0.491],
                        [0.929, 0.525],
                        [0.749, 0.525]
                    ],
                    "value": "1990-07-13"
                }
}

W9 Revision Date

  • w9_revision_date: The Revision month and year of the W9 form.
{
  "w9_revision_date": {
                            "confidence": 0.64,
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.085,
                                    0.071
                                ],
                                [
                                    0.155,
                                    0.071
                                ],
                                [
                                    0.155,
                                    0.085
                                ],
                                [
                                    0.085,
                                    0.085
                                ]
                            ],
                            "value": "august 2013"
                        }
}

Signature Position

  • signature_position: Position of the signature on the document. The signature itself is not part of the field.
{
  "signature_position": {
                            "page_id": 0,
                            "polygon": [
                                [
                                    0.223,
                                    0.642
                                ],
                                [
                                    0.369,
                                    0.642
                                ],
                                [
                                    0.369,
                                    0.682
                                ],
                                [
                                    0.223,
                                    0.682
                                ]
                            ]
                        }
}

Signature Date Position

  • signature_date_position: Position of the signature date on the document in the ISO 8601 format.
{
  "signature_date_position": {
                            "page_id": 0,
                            "polygon": []
                        }
}

Tax Classification LLC

  • tax_classification_llc: Depending on revision year the different values can be S, C, P or D for Limited Liability Company Classification.
{
  "tax_classification_llc": {
                            "confidence": 0.0,
                            "page_id": null,
                            "polygon": [],
                            "value": null
                        }
}

Questions?
Slack Logo Icon  Join our Slack