IndicBERT v2 is a multilingual BERT model trained on IndicCorpv2, covering 24 Indic languages. IndicBERT performs competitive to strong baselines and performs best on 7 out of 9 tasks on IndicXTREME benchmark.
Models
@misc{https://doi.org/10.48550/arxiv.2212.05409,
doi = {10.48550/ARXIV.2212.05409},
url = {https://arxiv.org/abs/2212.05409},
author = {Doddapaneni, Sumanth and Aralikatte, Rahul and Ramesh, Gowtham and Goyal, Shreya and Khapra, Mitesh M. and Kunchukuttan, Anoop and Kumar, Pratyush},
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {IndicXTREME: A Multi-Task Benchmark For Evaluating Indic Languages},
publisher = {arXiv},
year = {2022},
copyright = {arXiv.org perpetual, non-exclusive license}
}