from doctr.models import ocr_predictor, crnn_vgg16_bn, db_resnet50
from doctr.io import DocumentFile
from doctr.datasets import VOCABS
# Vocab copied from the indic-doctr repo
vocab = 'ॲऽऐथफएऎह८॥ॉम९ुँ१ं।षघठर॓ॼड़गछिॱटऩॄऑवल५ढ़य़अञसऔयण॑क़॒ौॽशऍ॰ूीऒॊख़उज़ॻॅ३ओऌळनॠ०ेढङ४़ॢग़पऊॐज२डैभझकआदबऋखॾ॔ोइ्धतफ़ईृःा६चऱऴ७-'
reco_model = crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=vocab)
# Download: https://github.com/iitb-research-code/indic-doctr/releases/download/model2/crnn_vgg16_bn_hindi.pt
local_model_path = "crnn_vgg16_bn_hindi.pt"
reco_params = torch.load(local_model_path, map_location="cpu")
reco_model.load_state_dict(reco_params)
predictor = ocr_predictor(det_arch='db_resnet50',reco_arch=reco_model, pretrained=True)
single_img_doc = DocumentFile.from_images("0022-0024_3_5_2.jpg")
result = predictor(single_img_doc)
print(result.pages[0].export()) # print the result of the first page as a list of #dicts```
I got an empty array with no output for a Hindi image. How to fix? Please help.
Originally posted by @manit2004 in #1617