Tesseract.js Worker in Chrome Extension MV3 (CSP & Blob issues)

258 views
Skip to first unread message

Tamar Y

unread,
Jul 21, 2025, 4:09:32 PM7/21/25
to Chromium Extensions

Hi:)
I'm building a Chrome extension that uses Tesseract.js for OCR, but I'm hitting two different issues depending on how I try to load the worker.

Here's the stripped-down relevant code:

try {
  worker = await Tesseract.createWorker('eng', 1, {
    corePath: chrome.runtime.getURL('tesseract/tesseract-core.wasm.js'),
    workerPath: chrome.runtime.getURL('tesseract/worker.min.js'),
    langPath: chrome.runtime.getURL('tesseract/lang/'),
    workerBlobURL: false,
    logger: m => console.log(m),
  });
} catch (regularWorkerError) {
  console.warn('Regular worker failed, trying Blob fallback:', regularWorkerError);
  try {
    const blobURL = await createWorkerBlob();
    worker = await Tesseract.createWorker('eng', 1, {
      corePath: chrome.runtime.getURL('tesseract/tesseract-core.wasm.js'),
      workerPath: blobURL,
      langPath: chrome.runtime.getURL('tesseract/lang/'),
      logger: m => console.log(m),
    });
  } catch (blobWorkerError) {
    console.error('Both methods failed:', blobWorkerError);
    throw new Error('OCR unavailable');
  }
}

async function createWorkerBlob() {
  const res = await fetch(chrome.runtime.getURL('tesseract/worker.min.js'));
  const blob = await res.blob();
  return URL.createObjectURL(blob);
}

In manifest.json (MV3):

"web_accessible_resources": [
  {
    "resources": [
      "tesseract/*",
      "tesseract/lang/*",
      "worker.min.js",
      "tesseract-core.wasm.js",
      "eng.traineddata"
    ],
    "matches": ["<all_urls>"]
  }
]

I get the following errors:

1. Regular worker (direct URL) fails with:

Failed to construct 'Worker':
Script at 'chrome-extension://.../tesseract/worker.min.js' cannot be accessed from origin ''

2. Blob fallback fails with:
Refused to create a worker from 'blob:https://us-east-1.console.aws.amazon.com/...'
because it violates the following Content Security Policy directive: "script-src ..."

Any practical examples or workarounds would be super helpful 🙏

Thanks!

Tamar Y

unread,
Jul 22, 2025, 2:01:35 PM7/22/25
to Chromium Extensions, Tamar Y
Hi, an update from yesterday:

I tried to run OCR via tesseract.js inside an offscreen document. I'm using createWorker() and providing local paths for the required files (including the WASM core file).

My extension structure includes:

  • tesseract/worker.min.js

  • tesseract/tesseract-core.wasm.js

  • tesseract.min.js

  • tesseract/lang/eng.traineddata

    offscreen.js:
    chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
      if (message.type === 'RUN_OCR') {
        const { imageURL } = message;
        const { createWorker } = Tesseract;

        const worker = await createWorker('eng', 1, {
          workerPath: await createWorkerBlob(),
          langPath: chrome.runtime.getURL('tesseract/lang/'),
          corePath: chrome.runtime.getURL('tesseract/tesseract-core.wasm.js'),
          workerBlobURL: false,
        });

        await worker.setParameters({
          tessedit_pageseg_mode: 11,
          tessedit_ocr_engine_mode: 1,
          preserve_interword_spaces: '1',
        });

        const result = await worker.recognize(imageURL, {}, { tsv: true });
        await worker.terminate();

        sendResponse({ success: true, result: result.data.tsv });
        return true;
      }
    });

    async function createWorkerBlob() {
      const response = await fetch(chrome.runtime.getURL('tesseract/worker.min.js'));
      const blob = await response.blob();
      return URL.createObjectURL(blob);
    }


    offscreen.html:

    <!DOCTYPE html>
    <html>
    <head>
      <meta charset="UTF-8" />
    </head>
    <body>
      <script src="tesseract.min.js"></script>
      <script type="module" src="offscreen.js"></script>
    </body>
    </html>

    BG page:

    chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
      if (message.type === 'RUN_OCR') {
        await setupOffscreenDocument('offscreen/ocr-offscreen.html');
        chrome.runtime.sendMessage({
          type: 'RUN_OCR',
          imageURL: message.imageURL,
        });
        return true;
      }
    });

    let creating;
    async function setupOffscreenDocument(path) {
      const offscreenUrl = chrome.runtime.getURL(path);
      const contexts = await chrome.runtime.getContexts({
        contextTypes: ['OFFSCREEN_DOCUMENT'],
        documentUrls: [offscreenUrl],
      });

      if (contexts.length > 0) return;

      if (creating) {
        await creating;
      } else {
        creating = chrome.offscreen.createDocument({
          url: path,
          reasons: ['WORKERS'],
          justification: 'OCR via Tesseract in offscreen worker',
        });
        await creating;
        creating = null;
      }
    }

    However, I get this error when the offscreen page tries to create the worker:

    Refused to load the script 'chrome-extension://<EXT_ID>/offscreen/tesseract-core.wasm.js'
  • because it violates the following Content Security Policy directive:
  • "script-src 'self' 'wasm-unsafe-eval'"

    And also-
    Uncaught Error: NetworkError: Failed to execute 'importScripts' on 'WorkerGlobalScope'

    How can I correctly load the Tesseract WebAssembly core file (tesseract-core.wasm.js) in an offscreen document given that it's accessible via chrome.runtime.getURL(...) but blocked by the extension’s CSP when used in importScripts()?

    Any workaround or best practice to get this running without using remote resources?

    Thanks!

Patrick Kettner

unread,
Jul 23, 2025, 10:54:47 PM7/23/25
to Tamar Y, Chromium Extensions
Hey Tamar,
Out of curiosity, did you see the community example for Chrome extensions in Tesseract's README.md?

--
You received this message because you are subscribed to the Google Groups "Chromium Extensions" group.
To unsubscribe from this group and stop receiving emails from it, send an email to chromium-extens...@chromium.org.
To view this discussion visit https://groups.google.com/a/chromium.org/d/msgid/chromium-extensions/094ede13-b9ec-4db0-8ca2-a869f8c145d8n%40chromium.org.
Message has been deleted

Tamar Y

unread,
Jul 29, 2025, 3:57:02 AM7/29/25
to Chromium Extensions, Patrick Kettner, Chromium Extensions, Tamar Y
Hey Patrick, 

Thanks for sending me the direct link!
However, copying all the files in the dist folder (as shown in the example) also didn't solve my problem.

There is also an open issue in Tesseract's issues regarding Browser extension Manifest version 3 -
https://github.com/naptha/tesseract.js/issues/961

I hope that someone in the community solved that somehow... (It works only on websites without CSP restrictions.)
Message has been deleted

Tamar Y

unread,
Aug 1, 2025, 8:14:18 AM8/1/25
to Chromium Extensions, Tamar Y, Patrick Kettner, Chromium Extensions
Hey, 

I finally found a reliable solution by combining two approaches, and thought I'd share it in case anyone else may need it in the future:

  1. Using an offscreen document to safely execute OCR.

  2. Serving all Tesseract.js files locally from the extension.

1. Download Tesseract Locally

Place the necessary Tesseract dist files into your extension folder. Example:
/scripts/
  - tesseract.min.js
  - tesser...@v5.0.4_dist_worker.min.js
  - tesseract-core.wasm.js
  - /languages/
      - eng.traineddata

2. offscreen.html



<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8" />
  </head>
  <body>

    <script src="scripts/tesseract.min.js"></script>


    <script type="module" src="offscreen.js"></script>
  </body>
</html>


3. offscreen.js


chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
  if (message.type === 'RUN_OCR') {

    try {


      const { imageURL } = message;
      const { createWorker } = Tesseract;

      const worker = await createWorker('eng', 1, {

        workerPath: chrome.runtime.getURL("scripts/tesser...@v5.0.4_dist_worker.min.js"),
        corePath: chrome.runtime.getURL("scripts/"),
        langPath: chrome.runtime.getURL("scripts/languages/"),


        workerBlobURL: false,
        logger: m => console.log(m),
      });

      await worker.setParameters({
        tessedit_pageseg_mode: 11,
        tessedit_ocr_engine_mode: 1,
        preserve_interword_spaces: '1',
      });

      const result = await worker.recognize(imageURL, {}, { tsv: true });

      await worker.terminate();

      chrome.runtime.sendMessage({
        type: 'OCR_RESULT',
        data: {


          success: true,
          result: result.data.tsv
        }
      });

    } catch (err) {
      sendResponse({ success: false, error: err.message });
    }

    return true;
  }
});

Reply all
Reply to author
Forward
0 new messages