openapi: 3.0.3 info: title: 'Scrapii API Documentation' description: "Scrapii is a secure PII redaction and tokenization API built for privacy-first AI workflows. It detects and replaces sensitive data with non-sensitive tokens, allowing documents to be safely processed by public AI services and frontier LLMs without exposing regulated or personal information.\n

\nScrapii supports authorized detokenization, enabling clients to securely restore original values when required for downstream workflows, audits, or human review. For maximum trust and data sovereignty, Scrapii also supports client-controlled token encryption, ensuring sensitive values can only be decrypted by the client—never the platform.\n

\nDesigned for developers, Scrapii integrates seamlessly into automated pipelines via REST APIs, n8n workflows, or any existing processing pipeline, enabling low-friction redaction and de-identification across text, PDFs, and images. Deterministic token mapping preserves referential integrity while maintaining strict data boundaries.\n

\nScrapii is ideal for compliance-driven industries such as healthcare, legal, and finance, helping teams meet GDPR, HIPAA, SOC 2, and PIPEDA requirements while still leveraging modern AI, document processing, and automation tools." version: 1.0.0 servers: - url: 'https://scrapii.net' tags: - name: Endpoints description: '' components: securitySchemes: default: type: http scheme: bearer description: 'You can create API keys by visiting your dashboard and clicking Generate API Key. Include the API key in the Authorization header as: Bearer {apikey}' security: - default: [] paths: /api/v1/test: get: summary: '' operationId: getApiV1Test description: '' parameters: [] responses: 401: description: '' content: application/json: schema: type: object example: message: Unauthenticated. properties: message: type: string example: Unauthenticated. tags: - Endpoints /api/v1/document: post: summary: 'Upload and tokenize a document.' operationId: uploadAndTokenizeADocument description: 'Upload a document for PII detection and tokenization.' parameters: [] responses: 201: description: '' content: application/json: schema: type: object example: message: 'Document uploaded successfully.' data: id: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d user_id: 1 mode: token status: queued original_filename: example.pdf confidence_threshold: 0.85 entity_types: - EMAIL_ADDRESS - PHONE_NUMBER progress_percentage: 0 current_step: 'Queued for processing' created_at: '2024-01-01T00:00:00.000000Z' updated_at: '2024-01-01T00:00:00.000000Z' properties: message: type: string example: 'Document uploaded successfully.' data: type: object properties: id: type: string example: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d user_id: type: integer example: 1 mode: type: string example: token status: type: string example: queued original_filename: type: string example: example.pdf confidence_threshold: type: number example: 0.85 entity_types: type: array example: - EMAIL_ADDRESS - PHONE_NUMBER items: type: string progress_percentage: type: integer example: 0 current_step: type: string example: 'Queued for processing' created_at: type: string example: '2024-01-01T00:00:00.000000Z' updated_at: type: string example: '2024-01-01T00:00:00.000000Z' tags: - Endpoints requestBody: required: true content: multipart/form-data: schema: type: object properties: file: type: string format: binary description: 'The document file to process. Maximum size: 10 MB.' confidence_threshold: type: number description: 'Minimum confidence score (0.0-1.0) for PII detection.' example: 0.85 nullable: true entity_types: type: array description: 'Optional array of specific PII entity types to detect.' example: - EMAIL_ADDRESS - PHONE_NUMBER items: type: string required: - file '/api/v1/document/{document_id}': get: summary: 'Get the tokenized document content.' operationId: getTheTokenizedDocumentContent description: '' parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: message: 'Document content retrieved successfully.' data: content: 'Contact [TOKEN:EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e] for more information.' document_id: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d original_filename: example.pdf properties: message: type: string example: 'Document content retrieved successfully.' data: type: object properties: content: type: string example: 'Contact [TOKEN:EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e] for more information.' document_id: type: string example: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d original_filename: type: string example: example.pdf 403: description: '' content: application/json: schema: type: object example: message: 'You do not have permission to access this document.' properties: message: type: string example: 'You do not have permission to access this document.' 404: description: '' content: application/json: schema: type: object example: message: 'Processed document file not found.' properties: message: type: string example: 'Processed document file not found.' 409: description: '' content: application/json: schema: oneOf: - description: 'Not Ready' type: object example: message: 'Document is not ready for download.' properties: message: type: string example: 'Document is not ready for download.' - description: Failed type: object example: message: 'Document processing failed and cannot be downloaded.' properties: message: type: string example: 'Document processing failed and cannot be downloaded.' tags: - Endpoints parameters: - in: path name: document_id description: 'The ID of the document.' example: 019c4ece-4c65-70c9-9b0b-09c861785a85 required: true schema: type: string '/api/v1/document/{document_id}/status': get: summary: 'Get document status and details.' operationId: getDocumentStatusAndDetails description: '' parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: data: id: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d user_id: 1 mode: token status: completed original_filename: example.pdf confidence_threshold: 0.85 entity_types: - EMAIL_ADDRESS - PHONE_NUMBER progress_percentage: 100 current_step: 'Processing complete' created_at: '2024-01-01T00:00:00.000000Z' updated_at: '2024-01-01T00:00:01.000000Z' properties: data: type: object properties: id: type: string example: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d user_id: type: integer example: 1 mode: type: string example: token status: type: string example: completed original_filename: type: string example: example.pdf confidence_threshold: type: number example: 0.85 entity_types: type: array example: - EMAIL_ADDRESS - PHONE_NUMBER items: type: string progress_percentage: type: integer example: 100 current_step: type: string example: 'Processing complete' created_at: type: string example: '2024-01-01T00:00:00.000000Z' updated_at: type: string example: '2024-01-01T00:00:01.000000Z' 403: description: '' content: application/json: schema: type: object example: message: 'You do not have permission to access this document.' properties: message: type: string example: 'You do not have permission to access this document.' tags: - Endpoints parameters: - in: path name: document_id description: 'The ID of the document.' example: 019c4ece-4c65-70c9-9b0b-09c861785a85 required: true schema: type: string '/api/v1/document/{document_id}/detokenize': get: summary: 'Detokenize a processed document and return the original content.' operationId: detokenizeAProcessedDocumentAndReturnTheOriginalContent description: '' parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: message: 'Document detokenized successfully.' data: content: 'Contact john.doe@example.com for more information.' document_id: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d original_filename: example.pdf tokens_found: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' tokens_replaced: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' tokens_failed: [] total_detokenizations: 1 properties: message: type: string example: 'Document detokenized successfully.' data: type: object properties: content: type: string example: 'Contact john.doe@example.com for more information.' document_id: type: string example: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d original_filename: type: string example: example.pdf tokens_found: type: array example: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' items: type: string tokens_replaced: type: array example: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' items: type: string tokens_failed: type: array example: [] total_detokenizations: type: integer example: 1 400: description: '' content: application/json: schema: type: object example: message: 'Document is not tokenized' properties: message: type: string example: 'Document is not tokenized' 403: description: '' content: application/json: schema: type: object example: message: Unauthorized properties: message: type: string example: Unauthorized tags: - Endpoints parameters: - in: path name: document_id description: 'The ID of the document.' example: 019c4ece-4c65-70c9-9b0b-09c861785a85 required: true schema: type: string /api/v1/documents: post: summary: 'Download multiple tokenized documents as a single combined file.' operationId: downloadMultipleTokenizedDocumentsAsASingleCombinedFile description: '' parameters: [] responses: 200: description: Success content: text/plain: schema: type: string example: 'Returns a combined text file containing all requested tokenized documents.' 404: description: '' content: application/json: schema: type: object example: message: 'Some documents were not found or do not belong to you.' missing_document_ids: - 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d properties: message: type: string example: 'Some documents were not found or do not belong to you.' missing_document_ids: type: array example: - 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d items: type: string 422: description: '' content: application/json: schema: type: object example: message: 'Some documents are not ready for download.' not_ready_documents: - id: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d status: processing error_message: null properties: message: type: string example: 'Some documents are not ready for download.' not_ready_documents: type: array example: - id: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d status: processing error_message: null items: type: object properties: id: type: string example: 9d3a5c8e-4b2f-4a1e-8c3d-5e6f7a8b9c0d status: type: string example: processing error_message: type: string example: null nullable: true tags: - Endpoints requestBody: required: true content: application/json: schema: type: object properties: document_ids: type: array description: 'Must be a valid UUID.' example: - 6ff8f7f6-1eb3-3525-be4a-3932c805afed items: type: string separator: type: string description: 'Must not be greater than 1000 characters.' example: g nullable: true required: - document_ids '/api/v1/detokenize/{token}': get: summary: "Get a single token's original PII value." operationId: getASingleTokensOriginalPIIValue description: '' parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: message: 'Token retrieved successfully.' data: token_id: 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' pii_value: john.doe@example.com properties: message: type: string example: 'Token retrieved successfully.' data: type: object properties: token_id: type: string example: 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' pii_value: type: string example: john.doe@example.com 404: description: '' content: application/json: schema: type: object example: message: 'Token not found or access denied.' properties: message: type: string example: 'Token not found or access denied.' tags: - Endpoints parameters: - in: path name: token description: '' example: architecto required: true schema: type: string /api/v1/detokenize: post: summary: 'Detokenize text content by replacing tokens with original PII values.' operationId: detokenizeTextContentByReplacingTokensWithOriginalPIIValues description: '' parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: message: 'Content detokenized successfully.' data: detokenized_content: 'Contact John Doe at john.doe@example.com or (555) 123-4567.' tokens_found: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' tokens_replaced: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' tokens_failed: [] total_detokenizations: 2 properties: message: type: string example: 'Content detokenized successfully.' data: type: object properties: detokenized_content: type: string example: 'Contact John Doe at john.doe@example.com or (555) 123-4567.' tokens_found: type: array example: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' items: type: string tokens_replaced: type: array example: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' items: type: string tokens_failed: type: array example: [] total_detokenizations: type: integer example: 2 tags: - Endpoints requestBody: required: true content: application/json: schema: type: object properties: file: type: string description: '' example: null content: type: string description: 'Must not be greater than 10485760 characters.' example: b required: - content /api/v1/detokenize/file: post: summary: 'Detokenize uploaded text/csv files by replacing tokens with original PII values.' operationId: detokenizeUploadedTextcsvFilesByReplacingTokensWithOriginalPIIValues description: '' parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: message: 'File detokenized successfully.' data: detokenized_content: 'Contact John Doe at john.doe@example.com or (555) 123-4567.' tokens_found: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' tokens_replaced: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' tokens_failed: [] total_detokenizations: 2 properties: message: type: string example: 'File detokenized successfully.' data: type: object properties: detokenized_content: type: string example: 'Contact John Doe at john.doe@example.com or (555) 123-4567.' tokens_found: type: array example: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' items: type: string tokens_replaced: type: array example: - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' items: type: string tokens_failed: type: array example: [] total_detokenizations: type: integer example: 2 tags: - Endpoints requestBody: required: true content: multipart/form-data: schema: type: object properties: file: type: string format: binary description: 'Must be a file. Must not be greater than 10240 kilobytes.' content: type: string description: '' example: null required: - file /api/v1/detokenize/tokens: post: summary: 'Detokenize tokens.' operationId: detokenizeTokens description: "Supply an array of token IDs to retrieve their original PII values.\nToken IDs follow the format: `TYPE:hash` (e.g., `PHONE_NUMBER:abc123...`).\n\nTo extract token IDs from tokenized content, use this regex pattern:\n`/\\[TOKEN:([A-Z_]+:[a-f0-9]+)\\]/g`\n\nThis will match tokens like:\n`[TOKEN:US_SSN:1d593fdba2209408e11e0384a9a257d2e058d1532ade7ac8c47e0f447b1edaaa]`\n\nAnd capture the token ID:\n`US_SSN:1d593fdba2209408e11e0384a9a257d2e058d1532ade7ac8c47e0f447b1edaaa`" parameters: [] responses: 200: description: '' content: application/json: schema: type: object example: message: 'Token retrieval completed.' data: tokens: - token: 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' value: '(555) 123-4567' found: true - token: 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' value: john.doe@example.com found: true total_requested: 2 total_found: 2 total_not_found: 0 properties: message: type: string example: 'Token retrieval completed.' data: type: object properties: tokens: type: array example: - token: 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' value: '(555) 123-4567' found: true - token: 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' value: john.doe@example.com found: true items: type: object properties: token: type: string example: 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' value: type: string example: '(555) 123-4567' found: type: boolean example: true total_requested: type: integer example: 2 total_found: type: integer example: 2 total_not_found: type: integer example: 0 tags: - Endpoints requestBody: required: true content: application/json: schema: type: object properties: tokens: type: array description: 'Array of token IDs to retrieve.' example: - 'PHONE_NUMBER:07bf49bd8a8796e3e120e873e51da7156904ffdf6049c74c37da599609a7caf4' - 'EMAIL_ADDRESS:f26ab8cf5a26d76fd04f4f7a740b0c0e596975c528cb0028a365898fae44915e' items: type: string required: - tokens /api/openapi.json: get: summary: '' operationId: getApiOpenapiJson description: '' parameters: [] responses: 302: description: '' content: text/plain: schema: type: string example: "\n\n \n \n \n\n Redirecting to https://scrapii.net/docs/openapi.json\n \n \n Redirecting to https://scrapii.net/docs/openapi.json.\n \n" tags: - Endpoints