This module contains all the various helper functions used in the other modules.
Important
Note: This notebook contains a large collection of profane and offensive language to use as a word filter. It is not recommended for children or the highly sensitive.
Initialize self. See help(type(self)) for accurate signature.
fasttext_model = FastTextLanguageDetector.from_pretrained()# test spanishlang, prob = fasttext_model.get_language("Hola, como estas?")assert lang =="es"assert prob >0.9# test englishlang, prob = fasttext_model.get_language("Hello, how are you?")assert lang =="en"assert prob >0.9# test combinationlang, prob = fasttext_model.get_language("Hello, how are you? Hola, como estas?")assert prob <0.9
Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.
# test with multiple lineslang, prob = fasttext_model.get_language("Hello, how are you?\nI am fine, thank you.")assert lang =="en"assert prob >0.9lang, prob = fasttext_model.get_language("Hello, how are you?\n\nI am fine, thank you.")assert lang =="en"assert prob >0.9
# check pickling worksimport picklewithopen("/tmp/fasttext_model.pkl", "wb") as f: pickle.dump(fasttext_model, f)withopen("/tmp/fasttext_model.pkl", "rb") as f: pickled_fasttext_model = pickle.load(f)lang, prob = fasttext_model.get_language("Hello, how are you?")p_lang, p_prob = pickled_fasttext_model.get_language("Hello, how are you?")assert lang == p_langassert prob == p_probassert pickled_fasttext_model == fasttext_model
Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.
The following code has been copied from the awesome Huggingface Space by edugp
Initialize self. See help(type(self)) for accurate signature.
To run this test, you need to have kenlm installed: pip install https://github.com/kpu/kenlm/archive/master.zip
model = KenlmModel.from_pretrained( model_dataset="wikipedia", language="en", lower_case=True, remove_accents=True, normalize_numbers=True, punctuation=1,)# Get perplexityperplex_1 = model.get_perplexity("I am very perplexed")perplex_2 = model.get_perplexity("im hella trippin")assert perplex_1 < perplex_2
/home/nathan/miniconda3/envs/squeakily/lib/python3.10/site-packages/huggingface_hub/file_download.py:592: FutureWarning: `cached_download` is the legacy way to download files from the HF hub, please consider upgrading to `hf_hub_download`
warnings.warn(