Resume OCR
Automatically extract data from Resume, motivation and recommendation letters.
Mindee’s Resume API uses deep learning to automatically, accurately, and instantaneously parse your documents details. In a few seconds, the API extracts a set of data from your PDFs or photos of resume, motivation and recommendation letters, including:
- Document Language
- Document Type
- Given Names
- Surnames
- Nationality
- Email Address
- Phone Number
- Address
- Social Networks
- Profession
- Job Applied
- Languages
- Hard Skills
- Soft Skills
- Education
- Professional Experiences
- Certificates
Set up the API
Before making any API calls, you need to have created your API key.
- You'll need a Resume. You can use one of the sample documents provided below.
- Access your Resume API by clicking on the corresponding product card in the Document Catalog
- From the left navigation, go to documentation > API Reference, you'll find sample code in popular languages and command line.
from mindee import Client, product, AsyncPredictResponse
# Init a new client
mindee_client = Client(api_key="my-api-key-here")
# Load a file from disk
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
# Load a file from disk and enqueue it.
result: AsyncPredictResponse = mindee_client.enqueue_and_parse(
product.ResumeV1,
input_doc,
)
# Print a brief summary of the parsed data
print(result.document)
const mindee = require("mindee");
// for TS or modules:
// import * as mindee from "mindee";
// Init a new client
const mindeeClient = new mindee.Client({ apiKey: "my-api-key-here" });
// Load a file from disk
const inputSource = mindeeClient.docFromPath("/path/to/the/file.ext");
// Parse the file
const apiResponse = mindeeClient.enqueueAndParse(
mindee.product.ResumeV1,
inputSource
);
// Handle the response Promise
apiResponse.then((resp) => {
// print a string summary
console.log(resp.document.toString());
});
using Mindee;
using Mindee.Input;
using Mindee.Product.Resume;
string apiKey = "my-api-key-here";
string filePath = "/path/to/the/file.ext";
// Construct a new client
MindeeClient mindeeClient = new MindeeClient(apiKey);
// Load an input source as a path string
// Other input types can be used, as mentioned in the docs
var inputSource = new LocalInputSource(filePath);
// Call the product asynchronously with auto-polling
var response = await mindeeClient
.EnqueueAndParseAsync<ResumeV1>(inputSource);
// Print a summary of all the predictions
System.Console.WriteLine(response.Document.ToString());
// Print only the document-level predictions
// System.Console.WriteLine(response.Document.Inference.Prediction.ToString());
require 'mindee'
# Init a new client
mindee_client = Mindee::Client.new(api_key: 'my-api-key-here')
# Load a file from disk
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
# Parse the file
result = mindee_client.enqueue_and_parse(
input_source,
Mindee::Product::Resume::ResumeV1
)
# Print a full summary of the parsed data in RST format
puts result.document
# Print the document-level parsed data
# puts result.document.inference.prediction
import com.mindee.MindeeClient;
import com.mindee.input.LocalInputSource;
import com.mindee.parsing.common.AsyncPredictResponse;
import com.mindee.product.resume.ResumeV1;
import java.io.File;
import java.io.IOException;
public class SimpleMindeeClient {
public static void main(String[] args) throws IOException, InterruptedException {
String apiKey = "my-api-key-here";
String filePath = "/path/to/the/file.ext";
// Init a new client
MindeeClient mindeeClient = new MindeeClient(apiKey);
// Load a file from disk
LocalInputSource inputSource = new LocalInputSource(new File(filePath));
// Parse the file asynchronously
AsyncPredictResponse<ResumeV1> response = mindeeClient.enqueueAndParse(
ResumeV1.class,
inputSource
);
// Print a summary of the response
System.out.println(response.toString());
// Print a summary of the predictions
// System.out.println(response.getDocumentObj().toString());
// Print the document-level predictions
// System.out.println(response.getDocumentObj().getInference().getPrediction().toString());
// Print the page-level predictions
// response.getDocumentObj().getInference().getPages().forEach(
// page -> System.out.println(page.toString())
// );
}
}
API_KEY='my-api-key-here'
ACCOUNT='mindee'
ENDPOINT='resume'
VERSION='1'
FILE_PATH='/path/to/your/file.png'
# Maximum amount of retries to get the result of a queue
MAX_RETRIES=10
# Delay between requests
DELAY=6
# Enqueue the document for async parsing
QUEUE_RESULT=$(curl -sS --request POST \
-H "Authorization: Token $API_KEY" \
-H "Content-Type: multipart/form-data" \
-F "document=@$FILE_PATH" \
"https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/predict_async")
# Status code sent back from the server
STATUS_CODE=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']status_code[\"|']:[\s][\"|']*[a-zA-Z0-9-]*" | rev | cut --complement -f2- -d" " | rev)
# Check that the document was properly queued
if [ -z "$STATUS_CODE" ] || [ "$STATUS_CODE" -gt 399 ] || [ "$STATUS_CODE" -lt 200 ]
then
if [ -z "$STATUS_CODE" ]
then
echo "Request couldn't be processed."
exit 1
fi
echo "Error $STATUS_CODE was returned by API during enqueuing. "
# Print the additional details, if there are any:
ERROR=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']error[\"|']:[\s]\{[^\}]*" | rev | cut --complement -f2- -d"{" | rev)
if [ -z "$ERROR" ]
then
exit 1
fi
# Details on the potential error:
ERROR_CODE=$(echo "$ERROR" | grep -oP "[\"|']code[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev)
MESSAGE=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']message[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev)
DETAILS=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']details[\"|']:[\s]\"[^(\"|\')]*" | rev | cut --complement -f2- -d"\"" | rev)
echo "This was the given explanation:"
echo "-------------------------"
echo "Error Code: $ERROR_CODE"
echo "Message: $MESSAGE"
echo "Details: $DETAILS"
echo "-------------------------"
exit 1
else
echo "File sent, starting to retrieve from server..."
# Get the document's queue ID
QUEUE_ID=$(echo "$QUEUE_RESULT" | grep -oP "[\"|']id[\"|']:[\s][\"|'][a-zA-Z0-9-]*" | rev | cut --complement -f2- -d"\"" | rev)
# Amount of attempts to retrieve the parsed document were made
TIMES_TRIED=1
# Try to fetch the file until we get it, or until we hit the maximum amount of retries
while [ "$TIMES_TRIED" -lt "$MAX_RETRIES" ]
do
# Wait for a bit at each step
sleep $DELAY
# Note: we use -L here because the location of the file might be behind a redirection
PARSED_RESULT=$(curl -sS -L \
-H "Authorization: Token $API_KEY" \
"https://api.mindee.net/v1/products/$ACCOUNT/$ENDPOINT/v$VERSION/documents/queue/$QUEUE_ID")
# Isolating the job (queue) & the status to monitor the document
JOB=$(echo "$PARSED_RESULT" | grep -ioP "[\"|']job[\"|']:[\s]\{[^\}]*" | rev | cut --complement -f2- -d"{" | rev)
QUEUE_STATUS=$(echo "$JOB" | grep -ioP "[\"|']status[\"|']:[\s][\"|'][a-zA-Z0-9-]*" | rev | cut --complement -f2- -d"\"" | rev)
if [ "$QUEUE_STATUS" = "completed" ]
then
# Print the result
echo "$PARSED_RESULT"
# Optional: isolate the document:
# DOCUMENT=$(echo "$PARSED_RESULT" | grep -ioP "[\"|']document[\"|']:[\s].*([\"|']job[\"|'])" | rev | cut -f2- -d"," | rev)
# echo "{$DOCUMENT}"
# Remark: on compatible shells, fields can also be extracted through the use of tools like jq:
# DOCUMENT=$(echo "$PARSED_RESULT" | jq '.["document"]')
exit 0
fi
TIMES_TRIED=$((TIMES_TRIED+1))
done
fi
echo "Operation aborted, document not retrieved after $TIMES_TRIED tries"
exit 1
<?php
use Mindee\Client;
use Mindee\Product\Resume\ResumeV1;
// Init a new client
$mindeeClient = new Client("my-api-key-here");
// Load a file from disk
$inputSource = $mindeeClient->sourceFromPath("/path/to/the/file.ext");
// Parse the file asynchronously
$apiResponse = $mindeeClient->enqueueAndParse(ResumeV1::class, $inputSource);
echo $apiResponse->document;
- Replace my-api-key-here with your new API key, or use the "select an API key" feature and it will be filled automatically.
- Copy and paste the sample code of your desired choice in your application, code environment or terminal.
- Replace
/path/to/my/file
with the path to your document.
Always remember to replace your API key!
- Run your code. You will receive a JSON response with the document details.
API Response
Here is the full JSON response you get when you call the API:
{
"api_request": {
"error": {},
"resources": [
"document",
"job"
],
"status": "success",
"status_code": 200,
"url": "https://api.mindee.net/v1/products/mindee/resume/v1/documents/a6601da5-d5cb-48c4-8d1a-de54d19b1bb5"
},
"document": {
"id": "a6601da5-d5cb-48c4-8d1a-de54d19b1bb5",
"inference": {
"extras": {},
"finished_at": "2024-10-29T10:21:14.778000",
"is_rotation_applied": true,
"pages": [
{
"extras": {},
"id": 0,
"orientation": {
"value": 0
},
"prediction": {}
}
],
"prediction": {
"address": {
"value": "177 Great Portland Street, London, W5W 6PQ"
},
"certificates": [
{
"grade": null,
"name": "PHP Framework (certificate):Zend,Codeigniter,Symfony",
"provider": null,
"year": null
},
{
"grade": null,
"name": "Programming Languages:JavaScript,HTML5, PHP OOP,CSS,SQL,MySQL",
"provider": null,
"year": null
}
],
"document_language": {
"value": "ENG"
},
"document_type": {
"value": "RESUME"
},
"education": [
{
"degree_domain": "Computer Information Systems",
"degree_type": "Bachelor of Science",
"end_month": null,
"end_year": null,
"school": "Columbia University, NY",
"start_month": null,
"start_year": "2014"
}
],
"email_address": {
"value": "[email protected]"
},
"given_names": [
{
"value": "Christopher"
}
],
"hard_skills": [
{
"value": "HTML5"
},
{
"value": "PHP OOP"
},
{
"value": "JavaScript"
},
{
"value": "CSS"
},
{
"value": "MySQL"
}
],
"job_applied": {
"value": null
},
"languages": [
{
"language": "SPA",
"level": "Fluent"
},
{
"language": "CHI",
"level": "Beginner"
},
{
"language": "GER",
"level": "Intermediate"
}
],
"nationality": {
"value": null
},
"phone_number": {
"value": "+44 (0)20 7666 8555"
},
"profession": {
"value": "Senior Web Developer"
},
"professional_experiences": [
{
"contract_type": null,
"department": null,
"employer": "Luna Web Design, New York",
"end_month": "05",
"end_year": "2019",
"role": "Web Developer",
"start_month": "09",
"start_year": "2015"
}
],
"social_networks_urls": [
{
"name": "LinkedIn",
"url": "linkedin.com/christopher.morgan"
}
],
"soft_skills": [
{
"value": "Project management"
},
{
"value": "Creative design"
},
{
"value": "Strong decision maker"
},
{
"value": "Innovative"
},
{
"value": "Complex problem solver"
},
{
"value": "Service-focused"
}
],
"surnames": [
{
"value": "Morgan"
}
]
},
"processing_time": 8.661,
"product": {
"features": [
"document_language",
"document_type",
"given_names",
"surnames",
"nationality",
"email_address",
"phone_number",
"address",
"social_networks_urls",
"profession",
"job_applied",
"languages",
"hard_skills",
"soft_skills",
"education",
"professional_experiences",
"certificates"
],
"name": "mindee/resume",
"type": "standard",
"version": "1.0"
},
"started_at": "2024-10-29T10:21:05.922000"
},
"n_pages": 1,
"name": "cv_templates_with_photo.jpg"
},
"job": {
"available_at": "2024-10-29T10:21:14.791000",
"error": {},
"id": "9d15c01c-cc12-4623-8ccb-3a42cdff781d",
"issued_at": "2024-10-29T10:21:05.922000",
"status": "completed"
}
}
You can find the prediction within the prediction
key found in document > inference > prediction
for document-level predictions: it contains the different fields extracted at the document level, meaning that for multi-pages PDFs, we reconstruct a single object using all the pages.
Extracted data
Using the above Resume example the following are the basic fields that can be extracted.
- Document Language
- Document Type
- Given Names
- Surnames
- Nationality
- Email Address
- Phone Number
- Address
- Social Networks
- Profession
- Job Applied
- Languages
- Hard Skills
- Soft Skills
- Education
- Professional Experiences
- Certificates
Document Language
- document_language: The ISO 639 code of the language in which the document is written.
{
"document_language": {
"value": "ENG"
}
}
Document Type
- document_type: The type of the document sent. Possible values are the following:
- RESUME
- RECOMMENDATION LETTER
- MOTIVATION_LETTER
{
"document_type": {
"value": "RESUME"
}
}
Given Names
- given_names: The candidate's first or given names.
{
"given_names": [
{
"value": "Christopher"
}
]
}
Surnames
- surnames: The candidate's last names.
{
"surnames": [
{
"value": "Morgan"
}
]
}
Nationality
- nationality: The ISO 3166 code for the country of citizenship of the candidate.
{
"nationality": {
"value": null
}
}
Email Address
- email_address: The email address of the candidate.
{
"email_address": {
"value": "[email protected]"
}
}
Phone Number
- phone_number: The phone number of the candidate.
{
"phone_number": {
"value": "+44 (0)20 7666 8555"
}
}
Address
- address: The location information of the candidate, including city, state, and country.
{
"address": {
"value": "177 Great Portland Street, London, W5W 6PQ"
}
}
Social Networks
- social_networks_urls: The list of social network profiles of the candidate.
- name: The name of the social network.
- url: The URL of the social network.
{
"social_networks_urls": [
{
"name": "LinkedIn",
"url": "linkedin.com/christopher.morgan"
}
]
}
Profession
- profession: The candidate's current profession.
{
"profession": {
"value": "Senior Web Developer"
}
}
Job Applied
- job_applied: The position that the candidate is applying for.
{
"job_applied": {
"value": null
}
}
Languages
- languages: The list of languages that the candidate is proficient in.
- language: The language's ISO 639 code.
- level: The candidate's level for the language.
{
"languages": [
{
"language": "SPA",
"level": "Fluent"
},
{
"language": "CHI",
"level": "Beginner"
},
{
"language": "GER",
"level": "Intermediate"
}
]
}
Hard Skills
- hard_skills: The position that the candidate is applying for.
{
"hard_skills": [
{
"value": "HTML5"
},
{
"value": "PHP OOP"
},
{
"value": "JavaScript"
},
{
"value": "CSS"
},
{
"value": "MySQL"
}
]
}
Soft Skills
- soft_skills: The list of the candidate's interpersonal and communication abilities.
{
"soft_skills": [
{
"value": "Project management"
},
{
"value": "Creative design"
},
{
"value": "Strong decision maker"
},
{
"value": "Innovative"
},
{
"value": "Complex problem solver"
},
{
"value": "Service-focused"
}
]
}
Education
- education: The list of the candidate's educational background.
- school: The name of the school.
- degree_type: The type of degree obtained, such as Bachelor's, Master's, or Doctorate.
- degree_domain: The area of study or specialization.
- start_year: The year when the education program or course began.
- start_month: The month when the education program or course began.
- end_year: The year when the education program or course was completed.
- end_month: The month when the education program or course was completed.
{
"education": [
{
"degree_domain": "Computer Information Systems",
"degree_type": "Bachelor of Science",
"end_month": null,
"end_year": null,
"school": "Columbia University, NY",
"start_month": null,
"start_year": "2014"
}
]
}
Professional Experiences
- professional_experiences: The list of the candidate's professional experiences.
- employer: The name of the company or organization.
- role: The position or job title held by the candidate.
- department: The specific department or division within the company.
- contract_type: The type of contract for the professional experience.
- start_year: The year when the professional experience began.
- start_month: The month when the professional experience began.
- end_year: The year when the professional experience ended.
- end_month: The month when the professional experience ended.
{
"professional_experiences": [
{
"contract_type": null,
"department": null,
"employer": "Luna Web Design, New York",
"end_month": "05",
"end_year": "2019",
"role": "Web Developer",
"start_month": "09",
"start_year": "2015"
}
]
}
Certificates
- certificates: The list of certificates obtained by the candidate.
- year: The year when a certificate was issued or received.
- provider: The organization or institution that issued the certificate.
- name: The name of certification.
- grade: The grade obtained for the certificate.
{
"certificates": [
{
"grade": null,
"name": "PHP Framework (certificate):Zend,Codeigniter,Symfony",
"provider": null,
"year": null
},
{
"grade": null,
"name": "Programming Languages:JavaScript,HTML5, PHP OOP,CSS,SQL,MySQL",
"provider": null,
"year": null
}
]
}
Questions?
Join our Slack
Updated about 2 months ago