Skip to content

Commit cc7e6a5

Browse files
DevinTDHaahmedlone127jsl-modelsprabodAbdullahMubeenAnwar
authored
Merge Model Hub (#14635)
* Add model 2025-04-09-gpt2_bash_history_baseline_pipeline_en * Add model 2025-04-09-flutterbot_en * Add model 2025-04-07-frankietime_en * Add model 2025-04-09-hushem_5x_deit_small_sgd_0001_fold4_en * Add model 2025-04-08-bmu_finetuned_pubmedbert_mlm_medquad_en * Add model 2025-04-09-db_slr_1_1e_pipeline_en * Add model 2025-04-09-saudiah_repat_someone_470_pipeline_en * Add model 2025-04-09-sent_cxr_bioclinicalbert_v1_en * Add model 2025-04-09-xlsr_vorarlbergerisch_en * Add model 2025-04-09-sent_bert_base_multilingual_uncased_pretrained_xx * Add model 2025-04-08-emotion_classification_dafa_w_en * Add model 2025-04-09-wav2vec2_base_timit_demo_google_colab_atgarcia_pipeline_en * Add model 2025-04-08-bert_finetuned_ner_cti_en * Add model 2025-04-09-sent_arabic_monomodel_monotok_en * Add model 2025-04-09-sent_schwurpert_pipeline_de * Add model 2025-04-08-wav2vec2_large_xls_r_300m_hindi_devendr_en * Add model 2025-04-08-dialogpt_medium_harry_pipeline_en * Add model 2025-04-09-gpt_2_finetuning_airaid_en * Add model 2025-04-08-mchammer_pipeline_en * Add model 2025-04-09-wav2vec2_large_xls_r_300m_kor_11385_2_en * Add model 2025-04-09-sent_bert_base_stackoverflow_comments_2m_pipeline_en * Add model 2025-04-08-shape_nato_pipeline_en * Add model 2025-04-09-burmese_awesome_wnut_model_ai_pipeline_en * Add model 2025-04-09-vit_female_age_classification_en * Add model 2025-04-09-vit_base_oxford_iiit_pets_niko132_pipeline_en * Add model 2025-04-09-koriposting_en * Add model 2025-04-09-rockdrigoma_pipeline_en * Add model 2025-04-09-vit_base_patch16_224_finetuned_cedar_en * Add model 2025-04-09-williamblakebot_pipeline_en * Add model 2025-04-09-bert_base_train_book_ent_15p_ra_en * Add model 2025-04-09-tinybert_train_book_ent_15p_en * Add model 2025-04-08-exp_w2v2t_indonesian_xlsr_53_s358_id * Add model 2025-04-08-bert_finetuned_ner_accelerate_atichets_pipeline_en * Add model 2025-04-09-brad_buchsbaum_en * Add model 2025-04-09-honeytech_pipeline_en * Add model 2025-04-09-extended_gender_classifier_en * Add model 2025-04-09-smids_1x_deit_tiny_rms_001_fold3_pipeline_en * Add model 2025-04-09-icelynjennings_pipeline_en * Add model 2025-04-09-jackposobiec_pipeline_en * Add model 2025-04-09-sent_finnish_monomodel_monotok_pipeline_en * Add model 2025-04-08-exp5_10partition_modelo_asl6000_pipeline_en * Add model 2025-04-08-output_pipeline_pt * Add model 2025-04-09-bert_finetuned_ner_huizhoucheng_en * Add model 2025-04-09-icelynjennings_en * Add model 2025-04-09-sent_tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en * Add model 2025-04-09-sent_drclips_en * Add model 2025-04-09-sent_nbme_bio_clinicalbert_en * Add model 2025-04-09-finetune_model_bert_en * Add model 2025-04-09-bert_finetuned_ner_fundrais123_en * Add model 2025-04-09-filler_username_pipeline_en * Add model 2025-04-09-gpt2_chatbot_kuttersn_en * Add model 2025-04-09-musebiihi_pipeline_en * Add model 2025-04-09-disconcision_pipeline_en * Add model 2025-04-09-arxiv_classifier_debertav3_en * Add model 2025-04-08-wenger_en * Add model 2025-04-08-burmese_awesome_model_recod_en * Add model 2025-04-09-exp_w2v2t_portuguese_norwegian_pretraining_s84_pt * Add model 2025-04-09-sent_bert_base_uncased_finetuned_mol_mlm_0_3_en * Add model 2025-04-09-sent_tlm_rct_20k_large_scale_pipeline_en * Add model 2025-04-08-jen_122_pipeline_en * Add model 2025-04-09-dkulchar_pipeline_en * Add model 2025-04-09-pico8degalaleo_pipeline_en * Add model 2025-04-09-dialogpt_medium_captainprice_extended_en * Add model 2025-04-09-wav2vec2_gujarati_stt_pipeline_en * Add model 2025-04-08-smids_5x_deit_small_rms_00001_fold1_en * Add model 2025-04-09-sent_minilm_l12_h384_uncased_finetuned_imdb_en * Add model 2025-04-09-bert_suicide_detection_hk_large_nepal_bhasa_pipeline_en * Add model 2025-04-09-distilbert_base_uncased_news_sentiment_finetuned_english_en * Add model 2025-04-08-monopolyfornite_en * Add model 2025-04-08-dialogpt_small_shy_en * Add model 2025-04-09-distilbert_token_itr0_0_0001_editorials_01_03_2022_15_20_12_pipeline_en * Add model 2025-04-09-kehlani_pipeline_en * Add model 2025-04-09-burmese_awesome_humanaction_model_pipeline_en * Add model 2025-04-09-tigers_side_vit_en * Add model 2025-04-09-stp_classifier_13_1_en * Add model 2025-04-08-nepali_grammar_error_detection_20250311_1323_en * Add model 2025-04-09-mldz4shad_en * Add model 2025-04-09-exp_w2v2t_swedish_northern_sami_xlsr_53_s328_en * Add model 2025-04-09-bert_base_uncased_token_itr0_0_0001_train_essays_test_test_set_05_03_2022_05_58_31_en * Add model 2025-04-09-wav2vec2_xlsr_53_marathi_large_en * Add model 2025-04-09-hushem_5x_deit_small_adamax_0001_fold1_pipeline_en * Add model 2025-04-09-lora_toxic_comment_pipeline_en * Add model 2025-04-09-absa_turkish_bert_based_small_tr * Add model 2025-04-08-smids_1x_deit_tiny_rms_001_fold5_en * Add model 2025-04-09-wav2vec2_base_timit_demo_colab_bsen_pipeline_en * Add model 2025-04-09-bert_base_turkish_sentiment_analysis_pipeline_tr * Add model 2025-04-09-bert_base_turkish_sentiment_analysis_tr * Add model 2025-04-09-bert_base_turkish_offensive_pipeline_tr * Add model 2025-04-09-document_type_identification_en * Add model 2025-04-09-sent_bnbert_pipeline_en * Add model 2025-04-09-wav2vec2_large_xls_r_300m_tamil_colab_aakhilesh_en * Add model 2025-04-08-sent_mbert_tlm_sent_english_chinese_en * Add model 2025-04-08-pii_protection_model_pipeline_en * Add model 2025-04-09-bert_tiny_finetuned_xglue_ner_en * Add model 2025-04-08-wav2vec2_large_xls_r_300m_urdu_colab_pipeline_en * Add model 2025-04-09-sent_bert_base_uncased_issues_128_xxr_pipeline_en * Add model 2025-04-09-sent_mbert_tlm_chat_english_german_en * Add model 2025-04-09-db_slr_1_1e_en * Add model 2025-04-08-cher_pipeline_en * Add model 2025-04-09-wav2vec2_base_libir_zenodo_pipeline_en * Add model 2025-04-09-vit_epochs5_batch32_lr5e_05_size224_tiles4_seed3_q3_dropout_v2_en * Add model 2025-04-09-wav2vec2_base_test_pipeline_en * Add model 2025-04-09-lesseyecontact_en * Add model 2025-04-09-wav2vec2_base_swbd_turn_eos_long_short_utt_removed_5percent_pipeline_en * Add model 2025-04-09-micbucci_pipeline_en * Add model 2025-04-09-veganseltzer_pipeline_en * Add model 2025-04-08-dialogpt_medium_ff7_en * Add model 2025-04-09-sent_storieslm_v1_1945_pipeline_en * Add model 2025-04-09-sent_mbert_tlm_chat_english_chinese_pipeline_en * Add model 2025-04-09-dialogpt_medium_milo_en * Add model 2025-04-09-dataandme_en * Add model 2025-04-09-lumetroid_en * Add model 2025-04-09-dialogpt_medium_milo_pipeline_en * Add model 2025-04-09-bbcqos_fitslut63_kellyg_official_en * Add model 2025-04-09-stp_classifier_13_1_pipeline_en * Add model 2025-04-09-vit_base_beans_demo_v5_hwooo92_pipeline_en * Add model 2025-04-09-ridiculouscrabs_en * Add model 2025-04-08-autotrain_20_12_2022_exam_part3_2543877946_pipeline_en * Add model 2025-04-09-zemfira_en * Add model 2025-04-09-michaeltrazzi_pipeline_en * Add model 2025-04-09-absa_turkish_bert_based_small_pipeline_tr * Add model 2025-04-09-gunna_pipeline_en * Add model 2025-04-09-ourqueeningreen_pipeline_en * Add model 2025-04-09-jenslennartsson_pipeline_en * Add model 2025-04-09-sent_bottleneckbertsmall_en * Add model 2025-04-09-dialogpt_mid_hpai_en * Add model 2025-04-09-shelbythanna_en * Add model 2025-04-09-macintoxic_en * Add model 2025-04-09-square_rundi_square_rundi_second_vote_full_pic_25_age_gender_en * Add model 2025-04-09-sent_first_try_rubert_200_16_16_25ep_en * Add model 2025-04-09-postpostpostr_en * Add model 2025-04-09-richardsocher_en * Add model 2025-04-09-bert_base_german_cased_finetuned_subj_v1_pipeline_en * Add model 2025-04-09-guggersylvain_pipeline_en * Add model 2025-04-09-guggersylvain_en * Add model 2025-04-09-macegrunow_en * Add model 2025-04-09-macegrunow_pipeline_en * Add model 2025-04-09-nueclear333_pipeline_en * Add model 2025-04-09-olikuchi_en * Add model 2025-04-09-wav2vec2_large_xlsr_53_full_train_full_train_pipeline_en * Add model 2025-04-09-lanalilligant_en * Add model 2025-04-08-peppa_pipeline_en * Add model 2025-04-08-3_epochs_classifier_en * Add model 2025-04-08-bert_base_greek_uncased_v1_finetuned_ner_pipeline_en * Add model 2025-04-09-deit_base_patch16_224_rice_leaf_disease_augmented_tagalog_pipeline_en * Add model 2025-04-08-wav2vec2_large_xlsr_estonian_m3hrdadfi_pipeline_et * Add model 2025-04-08-sent_bert_base_uncased_multi_128_pipeline_en * Add model 2025-04-09-mspunks_en * Add model 2025-04-09-mspunks_pipeline_en * Add model 2025-04-09-vit_base_patch16_224_masaratti_pipeline_en * Add model 2025-04-09-burmese_awesome_emotion_identifier_model_en * Add model 2025-04-09-wav2vec2_large_xls_r_300m_chichewa_colab_en * Add model 2025-04-09-lesseyecontact_pipeline_en * Add model 2025-04-07-dialogpt_small_rick_havokx_pipeline_en * Add model 2025-04-08-wav2vec2_large_uralic_voxpopuli_v2_sami_parl_ext_ft_en * Add model 2025-04-09-dnlklr_pipeline_en * Add model 2025-04-09-wav2vec2_base_cynthia_timit_pipeline_en * Add model 2025-04-09-mri_classifier_djibri_pipeline_en * 2025-04-11-smolvlm_instruct_int4_en (#14550) * Add model 2025-04-11-smolvlm_instruct_int4_en * Add model 2025-04-14-paligemma_3b_pt_224_int4_en * Add model 2025-04-15-paligemma_3b_ft_vqav2_448_int4_en * Add model 2025-04-15-paligemma_3b_pt_224_int4_en * Add model 2025-04-15-paligemma2_3b_pt_448_int4_en * Add model 2025-04-15-paligemma2_3b_mix_224_int4_en * Add model 2025-04-28-gemma_3_4b_it_int4_en * Add model 2025-04-28-gemma_3_4b_pt_int4_en --------- Co-authored-by: prabod <prabod@rathnayaka.me> * 2025-05-16-internvl2_1b_int4_en (#14577) * Add model 2025-05-16-internvl2_1b_int4_en * Add model 2025-05-16-internvl2_5_1b_int4_en * Add model 2025-05-16-internvl3_1b_int4_en * Add model 2025-05-16-internvl3_2b_int4_en * Add model 2025-05-16-internvl3_8b_int4_en * Add model 2025-05-16-internvl2_5_4b_int4_en * Add model 2025-05-27-florence_2_base_ft_int4_en * Add model 2025-05-27-florence_2_base_int4_en * Add model 2025-05-27-florence_2_large_ft_int4_en * Add model 2025-05-27-florence_2_large_int4_en --------- Co-authored-by: prabod <prabod@rathnayaka.me> * 2025-05-17-internvl3_8b_int4_en (#14580) * Add model 2025-05-17-internvl3_8b_int4_en * Add model 2025-05-20-mmarco_mminilmv2_l12_h384_v1_nreimers_en * Add model 2025-05-20-mmarco_mminilmv2_l12_h384_v1_nreimers_pipeline_en * Add model 2025-05-20-bge_reranker_base_baai_en * Add model 2025-05-20-xlm_roberta_base_language_detection_xx * Add model 2025-05-20-bge_reranker_base_baai_pipeline_en * Add model 2025-05-20-xlm_roberta_base_language_detection_pipeline_xx * Add model 2025-05-20-twitter_xlm_roberta_base_sentiment_multilingual_xx * Add model 2025-05-20-korean_reranker_ko * Add model 2025-05-20-korean_reranker_pipeline_ko * Add model 2025-05-20-twitter_xlm_roberta_base_sentiment_multilingual_pipeline_xx * Add model 2025-05-20-bce_reranker_base_v1_maidalun1020_pipeline_en * Add model 2025-05-20-bce_reranker_base_v1_maidalun1020_en * Add model 2025-05-20-multilingual_iptc_news_topic_classifier_xx * Add model 2025-05-20-bge_reranker_v2_m3_en * Add model 2025-05-20-multilingual_iptc_news_topic_classifier_pipeline_xx * Add model 2025-05-20-bge_reranker_v2_m3_pipeline_en * Add model 2025-05-20-xlm_roberta_base_romanian_ner_ronec_ro * Add model 2025-05-20-xlm_roberta_ner_japanese_ja * Add model 2025-05-20-xlm_roberta_base_romanian_ner_ronec_pipeline_ro * Add model 2025-05-20-xlm_roberta_ner_japanese_pipeline_ja * Add model 2025-05-20-xlm_roberta_large_finetuned_conll03_english_xx * Add model 2025-05-20-xlm_roberta_large_finetuned_conll03_german_xx * Add model 2025-05-20-fullstop_punctuation_multilang_large_en * Add model 2025-05-20-xlm_roberta_large_finetuned_conll03_english_pipeline_xx * Add model 2025-05-20-fullstop_punctuation_multilang_large_pipeline_en * Add model 2025-05-20-xlm_roberta_large_finetuned_conll03_german_pipeline_xx * Add model 2025-05-20-xlm_roberta_large_ner_spanish_es * Add model 2025-05-20-sent_twitter_xlm_roberta_base_en * Add model 2025-05-20-sent_twitter_xlm_roberta_base_pipeline_en * Add model 2025-05-20-sent_infoxlm_base_en * Add model 2025-05-20-sent_mminilmv2_l12_h384_distilled_from_xlmr_large_en * Add model 2025-05-20-sent_infoxlm_base_pipeline_en * Add model 2025-05-20-sent_mminilmv2_l12_h384_distilled_from_xlmr_large_pipeline_en * Add model 2025-05-20-sent_infoxlm_large_en * Add model 2025-05-20-sent_xlm_roberta_large_xx * Add model 2025-05-21-clip_vit_base_patch16_en * Add model 2025-05-21-fashion_clip_en * Add model 2025-05-21-clip_vit_base_patch16_pipeline_en * Add model 2025-05-21-fashion_clip_pipeline_en * Add model 2025-05-21-zero_shot_classifier_clip_vit_base_patch32_en * Add model 2025-05-21-zero_shot_classifier_clip_vit_base_patch32_pipeline_en * Add model 2025-05-21-clip_vit_large_patch14_336_en * Add model 2025-05-21-xlmroberta_qa_ukrainian_uk * Add model 2025-05-21-xlmroberta_qa_ukrainian_pipeline_uk * Add model 2025-05-21-xlm_roberta_qa_xlm_roberta_base_arabic_ar * Add model 2025-05-21-xlm_roberta_qa_xlm_roberta_base_arabic_pipeline_ar * Add model 2025-05-21-xlm_roberta_qa_xlm_roberta_base_squad2_distilled_en * Add model 2025-05-21-xlm_roberta_qa_xlm_roberta_base_squad2_distilled_pipeline_en * Add model 2025-05-21-xlmr_large_qa_persian_farsi_fa * Add model 2025-05-21-persian_xlm_roberta_large_en * Add model 2025-05-21-xlmr_large_qa_persian_farsi_pipeline_fa * Add model 2025-05-21-persian_xlm_roberta_large_pipeline_en * Add model 2025-05-21-xlm_roberta_large_qa_multilingual_finedtuned_russian_xx * Add model 2025-05-21-xlm_roberta_large_qa_multilingual_finedtuned_russian_pipeline_xx * Add model 2025-05-21-xlm_roberta_large_xquad_en * Add model 2025-05-21-xlm_roberta_large_xquad_pipeline_en * Add model 2025-05-21-mminilmv2_l12_h384_distilled_from_xlmr_large_en * Add model 2025-05-21-mminilmv2_l12_h384_distilled_from_xlmr_large_pipeline_en * Add model 2025-05-21-twitter_xlm_roberta_base_en * Add model 2025-05-21-twitter_xlm_roberta_base_pipeline_en * Add model 2025-05-21-xlm_roberta_base_xx * Add model 2025-05-21-xlm_roberta_base_pipeline_xx * Add model 2025-05-21-infoxlm_large_en * Add model 2025-05-21-infoxlm_base_en * Add model 2025-05-21-infoxlm_base_pipeline_en * Add model 2025-05-21-xlm_roberta_large_xx * Add model 2025-05-21-xlm_v_base_xx * Add model 2025-05-21-infoxlm_large_pipeline_en * Add model 2025-05-21-xlm_roberta_large_pipeline_xx * Add model 2025-05-21-xlm_v_base_pipeline_xx * Add model 2025-05-21-robbert_v2_dutch_ner_nl * Add model 2025-05-21-roberta_large_ner_english_en * Add model 2025-05-21-robbert_v2_dutch_ner_pipeline_nl * Add model 2025-05-21-roberta_large_tweetner7_all_en * Add model 2025-05-21-roberta_large_ner_english_pipeline_en * Add model 2025-05-21-roberta_token_classifier_sayula_popoluca_tagger_id * Add model 2025-05-21-roberta_token_classifier_sayula_popoluca_tagger_pipeline_id * Add model 2025-05-21-roberta_large_tweetner7_all_pipeline_en * Add model 2025-05-22-twitter_roberta_base_sentiment_en * Add model 2025-05-22-roberta_hate_speech_dynabench_r4_target_en * Add model 2025-05-22-twitter_roberta_base_sentiment_latest_en * Add model 2025-05-22-robertuito_sentiment_analysis_pipeline_es * Add model 2025-05-22-roberta_base_go_emotions_en * Add model 2025-05-22-roberta_hate_speech_dynabench_r4_target_pipeline_en * Add model 2025-05-22-roberta_classifier_emotion_english_distil_base_pipeline_en * Add model 2025-05-22-robertuito_sentiment_analysis_es * Add model 2025-05-22-twitter_roberta_base_sentiment_latest_pipeline_en * Add model 2025-05-22-twitter_roberta_base_sentiment_pipeline_en * Add model 2025-05-22-roberta_classifier_emotion_english_distil_base_en * Add model 2025-05-22-roberta_large_mnli_pipeline_en * Add model 2025-05-22-roberta_large_mnli_en * Add model 2025-05-22-roberta_base_go_emotions_pipeline_en * Add model 2025-05-22-twitter_roberta_base_sentiment_latest_en * Add model 2025-05-22-roberta_hate_speech_dynabench_r4_target_en * Add model 2025-05-22-twitter_roberta_base_sentiment_en * Add model 2025-05-22-robertuito_sentiment_analysis_pipeline_es * Add model 2025-05-22-twitter_roberta_base_sentiment_pipeline_en * Add model 2025-05-22-roberta_base_go_emotions_pipeline_en * Add model 2025-05-22-roberta_hate_speech_dynabench_r4_target_pipeline_en * Add model 2025-05-22-roberta_classifier_emotion_english_distil_base_en * Add model 2025-05-22-robertuito_sentiment_analysis_es * Add model 2025-05-22-roberta_large_mnli_en * Add model 2025-05-22-roberta_large_mnli_pipeline_en * Add model 2025-05-22-roberta_classifier_emotion_english_distil_base_pipeline_en * Add model 2025-05-22-roberta_base_go_emotions_en * Add model 2025-05-22-twitter_roberta_base_sentiment_latest_pipeline_en * Add model 2025-05-22-distilroberta_base_en * Add model 2025-05-22-codebert_python_en * Add model 2025-05-22-distilroberta_base_pipeline_en * Add model 2025-05-22-roberta_base_en * Add model 2025-05-22-chemberta_zinc_base_v1_en * Add model 2025-05-22-roberta_base_pipeline_en * Add model 2025-05-22-roberta_large_en * Add model 2025-05-22-chemberta_zinc_base_v1_pipeline_en * Add model 2025-05-22-codebert_python_pipeline_en * Add model 2025-05-22-roberta_large_pipeline_en * Add model 2025-05-22-amd_power_dialer_v1_en * Add model 2025-05-22-coherence_all_mpnet_base_v2_en * Add model 2025-05-22-information_content_model_en * Add model 2025-05-22-icelandic_nepal_bhasa_dataset_teacher_model_en * Add model 2025-05-22-amd_full_phonetree_v1_pipeline_en * Add model 2025-05-22-amd_partial_phonetree_v1_en * Add model 2025-05-22-amd_partial_v1_en * Add model 2025-05-22-burmese_setfit_classifier_threat_en * Add model 2025-05-22-coherence_all_mpnet_base_v2_pipeline_en * Add model 2025-05-22-hub_report_20241202125641_pipeline_en * Add model 2025-05-22-amd_partial_v1_pipeline_en * Add model 2025-05-22-amd_partial_phonetree_v1_pipeline_en * Add model 2025-05-22-burmese_setfit_classifier_threat_pipeline_en * Add model 2025-05-22-setfit_model_en * Add model 2025-05-22-icelandic_nepal_bhasa_dataset_teacher_model_pipeline_en * Add model 2025-05-22-setfit_model_pipeline_en * Add model 2025-05-22-amd_power_dialer_v1_pipeline_en * Add model 2025-05-22-hub_report_20241202125641_en * Add model 2025-05-22-amd_full_phonetree_v1_en * Add model 2025-05-22-information_content_model_pipeline_en * Add model 2025-05-22-autotrain_kjxi3_hql8x_en * Add model 2025-05-22-multi_qa_mpnet_base_dot_v1_finetuned_squad2_all_en * Add model 2025-05-22-covid_qa_mpnet_en * Add model 2025-05-22-multi_qa_mpnet_base_dot_v1_finetuned_squad2_all_pipeline_en * Add model 2025-05-22-covid_qa_mpnet_pipeline_en * Add model 2025-05-22-autotrain_kjxi3_hql8x_pipeline_en * Add model 2025-05-22-multi_qa_mpnet_base_cos_v1_sentence_transformers_en * Add model 2025-05-22-multi_qa_mpnet_base_dot_v1_en * Add model 2025-05-22-paraphrase_mpnet_base_v2_en * Add model 2025-05-22-patentsberta_en * Add model 2025-05-22-all_mpnet_base_v2_sentence_transformers_pipeline_en * Add model 2025-05-22-multi_qa_mpnet_base_cos_v1_sentence_transformers_pipeline_en * Add model 2025-05-22-patentsberta_pipeline_en * Add model 2025-05-22-fin_mpnet_base_en * Add model 2025-05-22-nli_mpnet_base_v2_en * Add model 2025-05-22-fin_mpnet_base_pipeline_en * Add model 2025-05-22-biolord_2023_c_en * Add model 2025-05-22-all_mpnet_base_v2_sentence_transformers_en * Add model 2025-05-22-paraphrase_mpnet_base_v2_pipeline_en * Add model 2025-05-22-biolord_2023_pipeline_en * Add model 2025-05-22-multi_qa_mpnet_base_dot_v1_pipeline_en * Add model 2025-05-22-biolord_2023_c_pipeline_en * Add model 2025-05-22-biolord_2023_en * Add model 2025-05-22-nli_mpnet_base_v2_pipeline_en * Add model 2025-05-22-e5_small_v2_intfloat_en * Add model 2025-05-22-e5_small_en * Add model 2025-05-22-e5_small_v2_intfloat_pipeline_en * Add model 2025-05-22-e5_small_pipeline_en * Add model 2025-05-22-e5_base_v2_intfloat_pipeline_en * Add model 2025-05-22-e5_base_pipeline_en * Add model 2025-05-22-sentence_transformers_e5_large_v2_en * Add model 2025-05-22-e5_large_en * Add model 2025-05-22-sentence_transformers_e5_large_v2_pipeline_en * Add model 2025-05-22-e5_base_en * Add model 2025-05-22-e5_base_v2_intfloat_en * Add model 2025-05-22-e5_large_pipeline_en * Add model 2025-05-22-e5_large_v2_intfloat_en * Add model 2025-05-22-e5_large_v2_intfloat_pipeline_en * Add model 2025-05-24-e5_small_en * Add model 2025-05-24-e5_small_v2_intfloat_en * Add model 2025-05-24-e5_small_pipeline_en * Add model 2025-05-24-e5_base_v2_intfloat_pipeline_en * Add model 2025-05-24-e5_small_v2_intfloat_pipeline_en * Add model 2025-05-24-sentence_transformers_e5_large_v2_en * Add model 2025-05-24-e5_base_v2_intfloat_en * Add model 2025-05-24-e5_large_en * Add model 2025-05-24-e5_base_pipeline_en * Add model 2025-05-24-sentence_transformers_e5_large_v2_pipeline_en * Add model 2025-05-24-e5_large_v2_intfloat_en * Add model 2025-05-24-e5_large_pipeline_en * Add model 2025-05-24-e5_base_en * Add model 2025-05-25-distilbert_tok_classifier_typo_detector_en * Add model 2025-05-25-biomedical_ner_all_d4data_en * Add model 2025-05-25-distilbert_ner_distilbert_base_cased_finetuned_conll03_english_en * Add model 2025-05-25-distilbert_ner_distilbert_base_cased_finetuned_conll03_english_pipeline_en * Add model 2025-05-25-distilbert_finetuned_ai4privacy_v2_en * Add model 2025-05-25-distilbert_ner_distilbert_base_multilingual_cased_ner_hrl_nl * Add model 2025-05-25-biomedical_ner_all_d4data_pipeline_en * Add model 2025-05-25-distilbert_base_multilingual_cased_pii_xx * Add model 2025-05-25-distilbert_token_classifier_keyphrase_extraction_inspec_pipeline_en * Add model 2025-05-25-chonky_distilbert_base_uncased_1_en * Add model 2025-05-25-distilbert_ner_dslim_en * Add model 2025-05-25-distilbert_tok_classifier_typo_detector_pipeline_en * Add model 2025-05-25-chonky_distilbert_base_uncased_1_pipeline_en * Add model 2025-05-25-distilbert_finetuned_ai4privacy_v2_pipeline_en * Add model 2025-05-25-distilbert_base_multilingual_cased_pii_pipeline_xx * Add model 2025-05-25-distilbert_ner_distilbert_base_multilingual_cased_ner_hrl_pipeline_nl * Add model 2025-05-25-distilbert_ner_dslim_pipeline_en * Add model 2025-05-25-distilbert_token_classifier_keyphrase_extraction_inspec_en * Add model 2025-05-25-distilbert_base_uncased_go_emotions_student_en * Add model 2025-05-25-toxic_comment_model_en * Add model 2025-05-25-nsfw_text_classifier_en * Add model 2025-05-25-distilbert_nsfw_text_classifier_pipeline_en * Add model 2025-05-25-distilbert_base_uncased_go_emotions_student_pipeline_en * Add model 2025-05-25-toxic_comment_model_pipeline_en * Add model 2025-05-25-nsfw_text_classifier_pipeline_en * Add model 2025-05-25-distilbert_nsfw_text_classifier_en * Add model 2025-05-25-multilingual_sentiment_analysis_xx * Add model 2025-05-25-multilingual_sentiment_analysis_pipeline_xx * Add model 2025-05-27-thainer_corpus_v2_base_model_th * Add model 2025-05-27-thainer_corpus_v2_base_model_pipeline_th * Add model 2025-05-27-phayathaibert_thainer_th * Add model 2025-05-27-nermembert_base_4entities_fr * Add model 2025-05-27-cas_biomedical_sayula_popoluca_tagging_fr * Add model 2025-05-27-phayathaibert_thainer_pipeline_th * Add model 2025-05-27-nermembert_large_3entities_fr * Add model 2025-05-27-nermembert_large_3entities_pipeline_fr * Add model 2025-05-27-cas_biomedical_sayula_popoluca_tagging_pipeline_fr * Add model 2025-05-27-nermembert_base_4entities_pipeline_fr * Add model 2025-05-27-rubert_base_cased_nli_threeway_ru * Add model 2025-05-27-rubert_base_cased_nli_threeway_pipeline_ru --------- Co-authored-by: ahmedlone127 <ahmedlone127@gmail.com> * Add model 2025-06-10-e5v_int4_en (#14599) Co-authored-by: prabod <prabod@rathnayaka.me> * Add model 2025-06-23-minilm_l6_v2_en * Add model 2025-06-22-bert_classifier_finbert_tone_en * Add model 2025-06-22-bert_classifier_finbert_tone_pipeline_en * Add model 2025-06-22-finbert_pipeline_en * Add model 2025-06-22-bert_base_multilingual_uncased_sentiment_xx * Add model 2025-06-22-bert_base_multilingual_uncased_sentiment_pipeline_xx * Add model 2025-06-22-finbert_en * Add model 2025-06-22-bert_base_multilingual_cased_google_bert_xx * Add model 2025-06-22-bert_base_multilingual_cased_google_bert_pipeline_xx * Add model 2025-06-22-bert_base_uncased_google_bert_en * Add model 2025-06-22-bert_base_cased_google_bert_pipeline_en * Add model 2025-06-22-bert_base_cased_google_bert_en * Add model 2025-06-22-bert_base_uncased_google_bert_pipeline_en * Add model 2025-06-22-sent_bert_base_multilingual_cased_xx * Add model 2025-06-22-sent_bert_base_multilingual_cased_pipeline_xx * Add model 2025-06-22-sent_bert_base_cased_en * Add model 2025-06-22-sent_bert_base_cased_pipeline_en * Add model 2025-06-22-sent_bert_base_uncased_pipeline_en * Add model 2025-06-22-sent_bert_base_uncased_en * Add model 2025-06-22-camembert_bio_base_fr * Add model 2025-06-22-camembert_bio_base_pipeline_fr * Add model 2025-06-22-drbert_7gb_fr * Add model 2025-06-22-umberto_commoncrawl_cased_v1_it * Add model 2025-06-22-camembert_base_fr * Add model 2025-06-22-umberto_commoncrawl_cased_v1_pipeline_it * Add model 2025-06-22-drbert_7gb_pipeline_fr * Add model 2025-06-22-sloberta_pipeline_sl * Add model 2025-06-22-camembert_base_pipeline_fr * Add model 2025-06-22-sloberta_sl * Add model 2025-06-22-wangchanberta_finetuned_sentiment_th * Add model 2025-06-22-wangchanberta_finetuned_sentiment_pipeline_th * Add model 2025-06-22-feel_italian_italian_emotion_it * Add model 2025-06-22-feel_italian_italian_sentiment_it * Add model 2025-06-22-finance_sentiment_french_base_fr * Add model 2025-06-22-feel_italian_italian_emotion_pipeline_it * Add model 2025-06-22-finance_sentiment_french_base_pipeline_fr * Add model 2025-06-22-ag_nli_dets_sentence_similarity_v4_pipeline_xx * Add model 2025-06-22-ag_nli_dets_sentence_similarity_v4_xx * Add model 2025-06-22-feel_italian_italian_sentiment_pipeline_it * Add model 2025-06-24-efficient_splade_vietnamese_bt_large_doc_en * Add model 2025-06-24-distilbert_base_cased_en * Add model 2025-06-24-distilbert_base_multilingual_cased_pipeline_xx * Add model 2025-06-24-distilbert_base_german_cased_de * Add model 2025-06-24-distilbert_base_cased_pipeline_en * Add model 2025-06-24-distilbert_base_multilingual_cased_xx * Add model 2025-06-24-distilbert_base_uncased_en * Add model 2025-06-24-opensearch_neural_sparse_encoding_v2_distill_en * Add model 2025-06-24-opensearch_neural_sparse_encoding_v2_distill_pipeline_en * Add model 2025-06-24-distilbert_base_uncased_pipeline_en * Add model 2025-06-24-efficient_splade_vietnamese_bt_large_doc_pipeline_en * Add model 2025-06-24-clinicalbert_pipeline_en * Add model 2025-06-24-opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en * Add model 2025-06-24-opensearch_neural_sparse_encoding_doc_v2_distill_en * Add model 2025-06-24-clinicalbert_en * Add model 2025-06-24-distilbert_base_german_cased_pipeline_de * Add model 2025-06-24-tiny_distilbert_base_cased_distilled_squad_en * Add model 2025-06-24-tiny_distilbert_base_cased_distilled_squad_pipeline_en * Add model 2025-06-24-distilbert_base_uncased_distilled_squad_distilbert_en * Add model 2025-06-24-distilbert_base_uncased_distilled_squad_distilbert_pipeline_en * Add model 2025-06-24-question_answering_v2_pipeline_en * Add model 2025-06-24-distilbert_base_cased_distilled_squad_distilbert_en * Add model 2025-06-24-distilbert_base_uncased_finetuned_squad_full_pipeline_en * Add model 2025-06-24-distilbert_base_cased_distilled_squad_distilbert_pipeline_en * Add model 2025-06-24-question_answering_v2_en * Add model 2025-06-24-distilbert_base_uncased_finetuned_squad_full_en * Add model 2025-06-24-hubert_large_japanese_asr_ja * Add model 2025-06-24-hubert_large_arabic_egyptian_ar * Add model 2025-06-24-hubert_large_japanese_asr_pipeline_ja * Add model 2025-06-24-hubert_large_arabic_egyptian_pipeline_ar * Add model 2025-06-24-distilbart_mnli_12_6_en * Add model 2025-06-24-distilbart_mnli_12_3_en * Add model 2025-06-24-distilbart_mnli_12_6_pipeline_en * Add model 2025-06-24-distilbart_mnli_12_1_en * Add model 2025-06-24-awesome_fb_model_en * Add model 2025-06-24-distilbart_mnli_12_3_pipeline_en * Add model 2025-06-24-distilbart_mnli_12_1_pipeline_en * Add model 2025-06-24-distilbart_mnli_12_9_pipeline_en * Add model 2025-06-24-bart_mnli_cnn_256_pipeline_en * Add model 2025-06-24-distilbart_mnli_12_9_en * Add model 2025-06-24-bart_mnli_cnn_256_en * Add model 2025-06-24-awesome_fb_model_pipeline_en * Add model 2025-06-24-bart_large_mnli_yahoo_answers_joeddav_pipeline_en * Add model 2025-06-24-bart_large_mnli_yahoo_answers_joeddav_en * Add model 2025-07-03-phi_3.5_mini_instruct_int4_en * 2025-07-15-bge_medembed_base_v0_1_openvino_en (#14629) * Add model 2025-07-15-bge_medembed_base_v0_1_openvino_en * Add model 2025-07-15-bge_medembed_large_v0_1_openvino_en * Add model 2025-07-15-all_mpnet_base_v2_openvino_en * Update 2025-07-15-bge_medembed_base_v0_1_openvino_en.md * Update 2025-07-15-bge_medembed_large_v0_1_openvino_en.md * Add model 2025-07-18-nuextract_2.0_2B_en --------- Co-authored-by: AbdullahMubeenAnwar <bdllhmubeen@gmail.com> Co-authored-by: Abdullah mubeen <77073730+AbdullahMubeenAnwar@users.noreply.github.com> --------- Co-authored-by: ahmedlone127 <ahmedlone127@gmail.com> Co-authored-by: jsl-models <74001263+jsl-models@users.noreply.github.com> Co-authored-by: prabod <prabod@rathnayaka.me> Co-authored-by: AbdullahMubeenAnwar <bdllhmubeen@gmail.com> Co-authored-by: Abdullah mubeen <77073730+AbdullahMubeenAnwar@users.noreply.github.com>
1 parent a0eb537 commit cc7e6a5

File tree

4 files changed

+549
-0
lines changed

4 files changed

+549
-0
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
---
2+
layout: model
3+
title: all-mpnet-base-v2 from sentence-transformers OpenVINO
4+
author: John Snow Labs
5+
name: all_mpnet_base_v2_openvino
6+
date: 2025-07-15
7+
tags: [openvino, english, embedding, open_source, mpnet, en]
8+
task: Embeddings
9+
language: en
10+
edition: Spark NLP 6.0.0
11+
spark_version: 3.0
12+
supported: true
13+
engine: openvino
14+
annotator: MPNetEmbeddings
15+
article_header:
16+
type: cover
17+
use_language_switcher: "Python-Scala-Java"
18+
---
19+
20+
## Description
21+
22+
This is a sentence-transformers model: It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for tasks like clustering or semantic search.
23+
24+
This model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector that captures the semantic information. The sentence vector may be used for information retrieval, clustering, or sentence similarity tasks.
25+
26+
By default, input text longer than 384 word pieces is truncated.
27+
28+
{:.btn-box}
29+
<button class="button button-orange" disabled>Live Demo</button>
30+
<button class="button button-orange" disabled>Open in Colab</button>
31+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_openvino_en_6.0.0_3.0_1752610809513.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
32+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_openvino_en_6.0.0_3.0_1752610809513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
33+
34+
## How to use
35+
36+
37+
38+
<div class="tabs-box" markdown="1">
39+
{% include programmingLanguageSelectScalaPythonNLU.html %}
40+
```python
41+
from sparknlp.base import DocumentAssembler
42+
from sparknlp.annotator import MPNetEmbeddings
43+
from pyspark.ml import Pipeline
44+
45+
document_assembler = DocumentAssembler()\
46+
.setInputCol("text")\
47+
.setOutputCol("document")
48+
49+
mpnet_loaded = MPNetEmbeddings.load("all_mpnet_base_v2_openvino")\
50+
.setInputCols(["document"])\
51+
.setOutputCol("mpnet_embeddings")\
52+
53+
pipeline = Pipeline(
54+
stages = [
55+
document_assembler,
56+
mpnet_loaded
57+
])
58+
59+
data = spark.createDataFrame([
60+
['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist.']
61+
]).toDF("text")
62+
63+
model = pipeline.fit(data)
64+
result = model.transform(data)
65+
66+
result.selectExpr("explode(mpnet_embeddings.embeddings) as embeddings").show()
67+
68+
```
69+
```scala
70+
import com.johnsnowlabs.nlp.base.DocumentAssembler
71+
import com.johnsnowlabs.nlp.embeddings.MPNetEmbeddings
72+
import org.apache.spark.ml.Pipeline
73+
import org.apache.spark.sql.functions.explode
74+
import spark.implicits._
75+
76+
val documentAssembler = new DocumentAssembler()
77+
.setInputCol("text")
78+
.setOutputCol("document")
79+
80+
val mpnetEmbeddings = MPNetEmbeddings.load("all_mpnet_base_v2_openvino")
81+
.setInputCols("document")
82+
.setOutputCol("mpnet_embeddings")
83+
84+
val pipeline = new Pipeline().setStages(Array(
85+
documentAssembler,
86+
mpnetEmbeddings
87+
))
88+
89+
val data = Seq(
90+
"William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist."
91+
).toDF("text")
92+
93+
val model = pipeline.fit(data)
94+
val result = model.transform(data)
95+
96+
result.select(explode($"mpnet_embeddings.embeddings").alias("embeddings")).show(false)
97+
98+
```
99+
</div>
100+
101+
## Results
102+
103+
```bash
104+
105+
+--------------------+
106+
| embeddings|
107+
+--------------------+
108+
|[-0.020282388, 0....|
109+
+--------------------+
110+
111+
```
112+
113+
{:.model-param}
114+
## Model Information
115+
116+
{:.table-model}
117+
|---|---|
118+
|Model Name:|all_mpnet_base_v2_openvino|
119+
|Compatibility:|Spark NLP 6.0.0+|
120+
|License:|Open Source|
121+
|Edition:|Official|
122+
|Input Labels:|[document]|
123+
|Output Labels:|[mpnet_embeddings]|
124+
|Language:|en|
125+
|Size:|406.5 MB|
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
---
2+
layout: model
3+
title: "MedEmbed base: Specialized Embedding Model for Medical and Clinical Information Retrieval (OpenVINO)"
4+
author: John Snow Labs
5+
name: bge_medembed_base_v0_1_openvino
6+
date: 2025-07-15
7+
tags: [openvino, english, medical_embedding, clinical_embedding, information_retrieval, open_source, bge, en]
8+
task: Embeddings
9+
language: en
10+
edition: Spark NLP 6.0.0
11+
spark_version: 3.0
12+
supported: true
13+
engine: openvino
14+
annotator: BGEEmbeddings
15+
article_header:
16+
type: cover
17+
use_language_switcher: "Python-Scala-Java"
18+
---
19+
20+
## Description
21+
22+
MedEmbed is a family of embedding models fine-tuned specifically for medical and clinical data, designed to enhance performance in healthcare-related natural language processing (NLP) tasks, particularly information retrieval.
23+
24+
GitHub Repo: https://github.com/abhinand5/MedEmbed
25+
Technical Blog Post: https://huggingface.co/blog/abhinand/medembed-finetuned-embedding-models-for-medical-ir
26+
27+
This model is intended for use in medical and clinical contexts to improve information retrieval, question answering, and semantic search tasks. It can be integrated into healthcare systems, research tools, and medical literature databases to enhance search capabilities and information access.
28+
29+
{:.btn-box}
30+
<button class="button button-orange" disabled>Live Demo</button>
31+
<button class="button button-orange" disabled>Open in Colab</button>
32+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_medembed_base_v0_1_openvino_en_6.0.0_3.0_1752605366919.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
33+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_medembed_base_v0_1_openvino_en_6.0.0_3.0_1752605366919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
34+
35+
## How to use
36+
37+
38+
39+
<div class="tabs-box" markdown="1">
40+
{% include programmingLanguageSelectScalaPythonNLU.html %}
41+
```python
42+
from sparknlp.base import DocumentAssembler
43+
from sparknlp.annotator import BGEEmbeddings
44+
from pyspark.ml import Pipeline
45+
46+
document_assembler = DocumentAssembler()\
47+
.setInputCol("text")\
48+
.setOutputCol("document")
49+
50+
bge_loaded = BGEEmbeddings.load("bge_medembed_base_v0_1_openvino")\
51+
.setInputCols(["document"])\
52+
.setOutputCol("embeddings")\
53+
54+
pipeline = Pipeline(
55+
stages = [
56+
document_assembler,
57+
bge_loaded
58+
])
59+
60+
data = spark.createDataFrame([
61+
['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist.']
62+
]).toDF("text")
63+
64+
model = pipeline.fit(data)
65+
result = model.transform(data)
66+
67+
result.selectExpr("explode(embeddings.embeddings) as embeddings").show()
68+
69+
```
70+
```scala
71+
import com.johnsnowlabs.nlp.base.DocumentAssembler
72+
import com.johnsnowlabs.nlp.embeddings.BGEEmbeddings
73+
import org.apache.spark.ml.Pipeline
74+
import org.apache.spark.sql.functions.explode
75+
import spark.implicits._
76+
77+
val documentAssembler = new DocumentAssembler()
78+
.setInputCol("text")
79+
.setOutputCol("document")
80+
81+
val bgeEmbeddings = BGEEmbeddings.load("bge_medembed_base_v0_1_openvino")
82+
.setInputCols("document")
83+
.setOutputCol("bge")
84+
85+
val pipeline = new Pipeline().setStages(Array(
86+
documentAssembler,
87+
bgeEmbeddings
88+
))
89+
90+
val data = Seq(
91+
"William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist."
92+
).toDF("text")
93+
94+
val model = pipeline.fit(data)
95+
val result = model.transform(data)
96+
97+
result.select(explode($"bge.embeddings").alias("embeddings")).show(false)
98+
99+
```
100+
</div>
101+
102+
## Results
103+
104+
```bash
105+
106+
+--------------------+
107+
| embeddings|
108+
+--------------------+
109+
|[-0.055220805, 0....|
110+
+--------------------+
111+
112+
```
113+
114+
{:.model-param}
115+
## Model Information
116+
117+
{:.table-model}
118+
|---|---|
119+
|Model Name:|bge_medembed_base_v0_1_openvino|
120+
|Compatibility:|Spark NLP 6.0.0+|
121+
|License:|Open Source|
122+
|Edition:|Official|
123+
|Input Labels:|[document]|
124+
|Output Labels:|[embeddings]|
125+
|Language:|en|
126+
|Size:|389.7 MB|
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
---
2+
layout: model
3+
title: "MedEmbed large: Specialized Embedding Model for Medical and Clinical Information Retrieval (OpenVINO)"
4+
author: John Snow Labs
5+
name: bge_medembed_large_v0_1_openvino
6+
date: 2025-07-15
7+
tags: [openvino, english, medical_embedding, clinical_embedding, information_retrieval, open_source, bge, en]
8+
task: Embeddings
9+
language: en
10+
edition: Spark NLP 6.0.0
11+
spark_version: 3.0
12+
supported: true
13+
engine: openvino
14+
annotator: BGEEmbeddings
15+
article_header:
16+
type: cover
17+
use_language_switcher: "Python-Scala-Java"
18+
---
19+
20+
## Description
21+
22+
MedEmbed is a family of embedding models fine-tuned specifically for medical and clinical data, designed to enhance performance in healthcare-related natural language processing (NLP) tasks, particularly information retrieval.
23+
24+
GitHub Repo: https://github.com/abhinand5/MedEmbed
25+
Technical Blog Post: https://huggingface.co/blog/abhinand/medembed-finetuned-embedding-models-for-medical-ir
26+
27+
This model is intended for use in medical and clinical contexts to improve information retrieval, question answering, and semantic search tasks. It can be integrated into healthcare systems, research tools, and medical literature databases to enhance search capabilities and information access.
28+
29+
{:.btn-box}
30+
<button class="button button-orange" disabled>Live Demo</button>
31+
<button class="button button-orange" disabled>Open in Colab</button>
32+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_medembed_large_v0_1_openvino_en_6.0.0_3.0_1752608614322.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
33+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_medembed_large_v0_1_openvino_en_6.0.0_3.0_1752608614322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
34+
35+
## How to use
36+
37+
38+
39+
<div class="tabs-box" markdown="1">
40+
{% include programmingLanguageSelectScalaPythonNLU.html %}
41+
```python
42+
from sparknlp.base import DocumentAssembler
43+
from sparknlp.annotator import BGEEmbeddings
44+
from pyspark.ml import Pipeline
45+
46+
document_assembler = DocumentAssembler()\
47+
.setInputCol("text")\
48+
.setOutputCol("document")
49+
50+
bge_loaded = BGEEmbeddings.load("bge_medembed_large_v0_1_openvino")\
51+
.setInputCols(["document"])\
52+
.setOutputCol("embeddings")\
53+
54+
pipeline = Pipeline(
55+
stages = [
56+
document_assembler,
57+
bge_loaded
58+
])
59+
60+
data = spark.createDataFrame([
61+
['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist.']
62+
]).toDF("text")
63+
64+
model = pipeline.fit(data)
65+
result = model.transform(data)
66+
67+
result.selectExpr("explode(embeddings.embeddings) as embeddings").show()
68+
69+
```
70+
```scala
71+
import com.johnsnowlabs.nlp.base.DocumentAssembler
72+
import com.johnsnowlabs.nlp.embeddings.BGEEmbeddings
73+
import org.apache.spark.ml.Pipeline
74+
import org.apache.spark.sql.functions.explode
75+
import spark.implicits._
76+
77+
val documentAssembler = new DocumentAssembler()
78+
.setInputCol("text")
79+
.setOutputCol("document")
80+
81+
val bgeEmbeddings = BGEEmbeddings.load("bge_medembed_large_v0_1_openvino")
82+
.setInputCols("document")
83+
.setOutputCol("bge")
84+
85+
val pipeline = new Pipeline().setStages(Array(
86+
documentAssembler,
87+
bgeEmbeddings
88+
))
89+
90+
val data = Seq(
91+
"William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist."
92+
).toDF("text")
93+
94+
val model = pipeline.fit(data)
95+
val result = model.transform(data)
96+
97+
result.select(explode($"bge.embeddings").alias("embeddings")).show(false)
98+
99+
```
100+
</div>
101+
102+
## Results
103+
104+
```bash
105+
106+
+--------------------+
107+
| embeddings|
108+
+--------------------+
109+
|[0.0026465012, 3....|
110+
+--------------------+
111+
112+
```
113+
114+
{:.model-param}
115+
## Model Information
116+
117+
{:.table-model}
118+
|---|---|
119+
|Model Name:|bge_medembed_large_v0_1_openvino|
120+
|Compatibility:|Spark NLP 6.0.0+|
121+
|License:|Open Source|
122+
|Edition:|Official|
123+
|Input Labels:|[document]|
124+
|Output Labels:|[embeddings]|
125+
|Language:|en|
126+
|Size:|1.2 GB|

0 commit comments

Comments
 (0)