From 0a27b11abca20c2f8dd72447a938c91bbd0e9820 Mon Sep 17 00:00:00 2001 From: Firdevs ARSLAN Date: Wed, 15 Apr 2026 10:24:53 +0200 Subject: [PATCH 1/4] docs(genapi): added ocr models --- .../how-to/query-ocr-models.mdx | 105 ++++++++++++++++++ pages/generative-apis/menu.ts | 6 +- 2 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 pages/generative-apis/how-to/query-ocr-models.mdx diff --git a/pages/generative-apis/how-to/query-ocr-models.mdx b/pages/generative-apis/how-to/query-ocr-models.mdx new file mode 100644 index 0000000000..d07dbca299 --- /dev/null +++ b/pages/generative-apis/how-to/query-ocr-models.mdx @@ -0,0 +1,105 @@ +--- +title: How to query OCR models +description: Learn how to interact with powerful OCR models using Scaleway's Generative APIs service. +tags: generative-apis ai-data ocr-models ocr-api +dates: + validation: 2026-04-14 + posted: 2026-04-14 +--- +import Requirements from '@macros/iam/requirements.mdx' + +Scaleway's Generative APIs service allows users to interact with powerful OCR (Optical Character Recognition) models hosted on the platform. + +OCR models can extract structured text from documents such as PDFs and images, preserving formatting and layout in the output. + + +OCR models are currently available via the [OCR API](https://www.scaleway.com/en/developers/api/generative-apis/#path-ocr-beta-create-a-text-extraction) only and are not yet integrated into the Scaleway console playground. + + + + +- A Scaleway account logged in to the [console](https://console.scaleway.com) +- [Owner](/iam/concepts/#owner) status or [IAM permissions](/iam/concepts/#permission) allowing you to perform actions in the intended Organization +- A valid [API key](/iam/how-to/create-api-keys/) for API authentication +- Python 3.7+ installed on your system + +## Query OCR models via API + +You can query the models programmatically using your favorite tools or languages. +In the example that follows, we will use the MistralAI Python client. + +### Install the MistralAI SDK + +Install the MistralAI SDK using pip: + +```bash +pip install mistralai +``` + +### Initialize the client + +Initialize the MistralAI client with your base URL and API key: + +```python +from mistralai.client import Mistral +import os + +# Initialize the client with your server URL and API key +mistral = Mistral( + server_url="https://api.scaleway.ai", # Scaleway's Generative APIs service URL + api_key="" # Your unique API secret key from Scaleway +) +``` + +### Generate an OCR text extraction + +You can now generate a text extraction. +In the example below, a sample PDF file ([scaleway-impact-report-10-pages.pdf](https://genapi-documentation-assets.s3.fr-par.scw.cloud/scaleway-impact-report-10-pages.pdf)) is sent to the OCR model via a public URL. The extracted text from each page is written to a local Markdown file. + +```python +# Generate a text extraction using the 'mistral-ocr-2512' model +FILE_URL = "https://genapi-documentation-assets.s3.fr-par.scw.cloud/scaleway-impact-report-10-pages.pdf" +MODEL = "mistral-ocr-2512" + +res = mistral.ocr.process( + model=MODEL, + document={ + "document_url": FILE_URL, + "type": "document_url", + } +) + +filename = FILE_URL.split("/")[-1].split(".")[0] +with open(f"{filename}.md", "w") as f: + for page in res.pages: + f.write(page.markdown) + +# Print the generated response +print(f"File processed. Result markdown file stored in: {filename}.md") +``` + + This code sends a message to the model and returns an answer based on your input. The `temperature`, `max_completion_tokens`, and `top_p` parameters control the response's creativity, length, and diversity, respectively. + +A conversation style may include a default system prompt. You may set this prompt by setting the first message with the role system. For example: + + ```python + [ + { + "role": "system", + "content": "You are Xavier Niel." + }, + { + "role": "user", + "content": "Hello, what is your name?" + } + ] + ``` + + Once the script completes, a Markdown file named `scaleway-impact-report-10-pages.md` is created in the current directory, containing the extracted and formatted text from each page of the PDF. + + +You can replace `FILE_URL` with the URL of any publicly accessible PDF or image file. +The input file or image must be stored in Scaleway Object Storage and referenced by its URL. + + +Refer to the dedicated [OCR API documentation](https://www.scaleway.com/en/developers/api/generative-apis/#path-ocr-beta-create-a-text-extraction) for a full list of all available parameters. \ No newline at end of file diff --git a/pages/generative-apis/menu.ts b/pages/generative-apis/menu.ts index 4dc37eaebf..c71605a70f 100644 --- a/pages/generative-apis/menu.ts +++ b/pages/generative-apis/menu.ts @@ -42,7 +42,11 @@ export const generativeApisMenu = { label: 'Query audio models', slug: 'query-audio-models' }, - { + { + label: 'Query OCR models', + slug: 'query-ocr-models' + }, + { label: 'Query reranking models', slug: 'query-reranking-models' }, From 91c0c8efc506b13cf6beb32bb4ab5ad66aff1781 Mon Sep 17 00:00:00 2001 From: Firdevs ARSLAN Date: Wed, 15 Apr 2026 10:36:24 +0200 Subject: [PATCH 2/4] Removed text --- .../how-to/query-ocr-models.mdx | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/pages/generative-apis/how-to/query-ocr-models.mdx b/pages/generative-apis/how-to/query-ocr-models.mdx index d07dbca299..e881081b6e 100644 --- a/pages/generative-apis/how-to/query-ocr-models.mdx +++ b/pages/generative-apis/how-to/query-ocr-models.mdx @@ -78,24 +78,7 @@ with open(f"{filename}.md", "w") as f: print(f"File processed. Result markdown file stored in: {filename}.md") ``` - This code sends a message to the model and returns an answer based on your input. The `temperature`, `max_completion_tokens`, and `top_p` parameters control the response's creativity, length, and diversity, respectively. - -A conversation style may include a default system prompt. You may set this prompt by setting the first message with the role system. For example: - - ```python - [ - { - "role": "system", - "content": "You are Xavier Niel." - }, - { - "role": "user", - "content": "Hello, what is your name?" - } - ] - ``` - - Once the script completes, a Markdown file named `scaleway-impact-report-10-pages.md` is created in the current directory, containing the extracted and formatted text from each page of the PDF. +Once the script completes, a Markdown file named `scaleway-impact-report-10-pages.md` is created in the current directory, containing the extracted and formatted text from each page of the PDF. You can replace `FILE_URL` with the URL of any publicly accessible PDF or image file. From 16e1389221088b513a26d61b94edd0bc16f67694 Mon Sep 17 00:00:00 2001 From: Firdevs ARSLAN Date: Wed, 15 Apr 2026 10:39:00 +0200 Subject: [PATCH 3/4] Fix link --- pages/generative-apis/how-to/query-ocr-models.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pages/generative-apis/how-to/query-ocr-models.mdx b/pages/generative-apis/how-to/query-ocr-models.mdx index e881081b6e..12756e6681 100644 --- a/pages/generative-apis/how-to/query-ocr-models.mdx +++ b/pages/generative-apis/how-to/query-ocr-models.mdx @@ -54,7 +54,7 @@ mistral = Mistral( ### Generate an OCR text extraction You can now generate a text extraction. -In the example below, a sample PDF file ([scaleway-impact-report-10-pages.pdf](https://genapi-documentation-assets.s3.fr-par.scw.cloud/scaleway-impact-report-10-pages.pdf)) is sent to the OCR model via a public URL. The extracted text from each page is written to a local Markdown file. +In the example below, the sample PDF file, [scaleway-impact-report-10-pages.pdf](https://genapi-documentation-assets.s3.fr-par.scw.cloud/scaleway-impact-report-10-pages.pdf), is sent to the OCR model via a public URL. The extracted text from each page is written to a local Markdown file. ```python # Generate a text extraction using the 'mistral-ocr-2512' model From f4b325274d5401769a023b8c51db83a773ef0cb7 Mon Sep 17 00:00:00 2001 From: Firdevs ARSLAN Date: Mon, 20 Apr 2026 15:16:38 +0200 Subject: [PATCH 4/4] fix upon review --- .../how-to/query-ocr-models.mdx | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/pages/generative-apis/how-to/query-ocr-models.mdx b/pages/generative-apis/how-to/query-ocr-models.mdx index 12756e6681..5bccc4ffac 100644 --- a/pages/generative-apis/how-to/query-ocr-models.mdx +++ b/pages/generative-apis/how-to/query-ocr-models.mdx @@ -42,7 +42,6 @@ Initialize the MistralAI client with your base URL and API key: ```python from mistralai.client import Mistral -import os # Initialize the client with your server URL and API key mistral = Mistral( @@ -51,6 +50,10 @@ mistral = Mistral( ) ``` + +This code sample requires `mistralai >= 2.0.0`. For `mistralai <= 1.12.4` (also named `v1`), replace `from mistralai.client import Mistral` with `from mistralai import Mistral`. + + ### Generate an OCR text extraction You can now generate a text extraction. @@ -82,7 +85,36 @@ Once the script completes, a Markdown file named `scaleway-impact-report-10-page You can replace `FILE_URL` with the URL of any publicly accessible PDF or image file. -The input file or image must be stored in Scaleway Object Storage and referenced by its URL. +For example, you can provide a file from Object Storage using an [Object Storage pre-signed URL](https://www.scaleway.com/en/docs/object-storage/how-to/access-objects-via-https/). +Alternatively, you can also provide a local PDF file encoded in Base64 format. + +```python +import base64 + +FILE_PATH = "path/to/your/file.pdf" +MODEL = "mistral-ocr-2512" + +with open(FILE_PATH, "rb") as file: + file_content = file.read() + encoded_file= base64.b64encode(file_content).decode("utf-8") + +res = mistral.ocr.process( + model=MODEL, + document={ + "document_url": f"data:application/pdf;base64,{encoded_file}", + "type": "document_url", + } +) + +filename = FILE_PATH.split("/")[-1].split(".")[0] +with open(f"{filename}.md", "w") as f: + for page in res.pages: + f.write(page.markdown) + +# Print the generated response +print(f"File processed. Result markdown file stored in: {filename}.md") +``` + Refer to the dedicated [OCR API documentation](https://www.scaleway.com/en/developers/api/generative-apis/#path-ocr-beta-create-a-text-extraction) for a full list of all available parameters. \ No newline at end of file