DivyaMereddy007
/

RecipeBert_v5original_epoc50Copy_of_TrainSetenceTransforme-Finetuning_v5_DistilledBert

DivyaMereddy007 commited on Jun 7

Commit

a5dd439

•

1 Parent(s): c50a6d5

Add new SentenceTransformer model.

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +455 -0
config.json +24 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,455 @@

+---
+language: []
+library_name: sentence-transformers
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:1746
+- loss:CosineSimilarityLoss
+base_model: sentence-transformers/distilbert-base-nli-mean-tokens
+datasets: []
+widget:
+- source_sentence: Cheeseburger Potato Soup ["6 baking potatoes", "1 lb. of extra
+    lean ground beef", "2/3 c. butter or margarine", "6 c. milk", "3/4 tsp. salt",
+    "1/2 tsp. pepper", "1 1/2 c (6 oz.) shredded Cheddar cheese, divided", "12 sliced
+    bacon, cooked, crumbled and divided", "4 green onion, chopped and divided", "1
+    (8 oz.) carton sour cream (optional)"] ["Wash potatoes; prick several times with
+    a fork.", "Microwave them with a wet paper towel covering the potatoes on high
+    for 6-8 minutes.", "The potatoes should be soft, ready to eat.", "Let them cool
+    enough to handle.", "Cut in half lengthwise; scoop out pulp and reserve.", "Discard
+    shells.", "Brown ground beef until done.", "Drain any grease from the meat.",
+    "Set aside when done.", "Meat will be added later.", "Melt butter in a large kettle
+    over low heat; add flour, stirring until smooth.", "Cook 1 minute, stirring constantly.
+    Gradually add milk; cook over medium heat, stirring constantly, until thickened
+    and bubbly.", "Stir in potato, ground beef, salt, pepper, 1 cup of cheese, 2 tablespoons
+    of green onion and 1/2 cup of bacon.", "Cook until heated (do not boil).", "Stir
+    in sour cream if desired; cook until heated (do not boil).", "Sprinkle with remaining
+    cheese, bacon and green onions."]
+  sentences:
+  - Nolan'S Pepper Steak ["1 1/2 lb. round steak (1-inch thick), cut into strips",
+    "1 can drained tomatoes, cut up (save liquid)", "1 3/4 c. water", "1/2 c. onions",
+    "1 1/2 Tbsp. Worcestershire sauce", "2 green peppers, diced", "1/4 c. oil"] ["Roll
+    steak strips in flour.", "Brown in skillet.", "Salt and pepper.", "Combine tomato
+    liquid, water, onions and browned steak. Cover and simmer for one and a quarter
+    hours.", "Uncover and stir in Worcestershire sauce.", "Add tomatoes, green peppers
+    and simmer for 5 minutes.", "Serve over hot cooked rice."]
+  - Fresh Strawberry Pie ["1 baked pie shell", "1 qt. cleaned strawberries", "1 1/2
+    c. water", "4 Tbsp. cornstarch", "1 c. sugar", "1/8 tsp. salt", "4 Tbsp. strawberry
+    jello"] ["Mix water, cornstarch, sugar and salt in saucepan.", "Stir constantly
+    and boil until thick and clear.", "Remove from heat and stir in jello.", "Set
+    aside to cool.", "But don't allow it to set. Layer strawberries in baked crust.",
+    "Pour cooled glaze over. Continue layering berries and glaze.", "Refrigerate.",
+    "Serve with whipped cream."]
+  - Vegetable-Burger Soup ["1/2 lb. ground beef", "2 c. water", "1 tsp. sugar", "1
+    pkg. Cup-a-Soup onion soup mix (dry)", "1 lb. can stewed tomatoes", "1 (8 oz.)
+    can tomato sauce", "1 (10 oz.) pkg. frozen mixed vegetables"] ["Lightly brown
+    beef in soup pot.", "Drain off excess fat.", "Stir in tomatoes, tomato sauce,
+    water, frozen vegetables, soup mix and sugar.", "Bring to a boil.", "Reduce heat
+    and simmer for 20 minutes. Serve."]
+- source_sentence: Summer Spaghetti ["1 lb. very thin spaghetti", "1/2 bottle McCormick
+    Salad Supreme (seasoning)", "1 bottle Zesty Italian dressing"] ["Prepare spaghetti
+    per package.", "Drain.", "Melt a little butter through it.", "Marinate overnight
+    in Salad Supreme and Zesty Italian dressing.", "Just before serving, add cucumbers,
+    tomatoes, green peppers, mushrooms, olives or whatever your taste may want."]
+  sentences:
+  - Prize-Winning Meat Loaf ["1 1/2 lb. ground beef", "1 c. tomato juice", "3/4 c.
+    oats (uncooked)", "1 egg, beaten", "1/4 c. chopped onion", "1/4 tsp. pepper",
+    "1 1/2 tsp. salt"] ["Mix well.", "Press firmly into an 8 1/2 x 4 1/2 x 2 1/2-inch
+    loaf pan.", "Bake in preheated moderate oven.", "Bake at 350\u00b0 for 1 hour.",
+    "Let stand 5 minutes before slicing.", "Makes 8 servings."]
+  - Cuddy Farms Marinated Turkey ["2 c. 7-Up or Sprite", "1 c. vegetable oil", "1
+    c. Kikkoman soy sauce", "garlic salt"] ["Buy whole turkey breast; remove all skin
+    and bones. Cut into pieces about the size of your hand. Pour marinade over turkey
+    and refrigerate for at least 8 hours (up to 48 hours). The longer it marinates,
+    the less cooking time it takes."]
+  - Pear-Lime Salad ["1 (16 oz.) can pear halves, undrained", "1 (3 oz.) pkg. lime
+    gelatin", "1 (8 oz.) pkg. cream cheese, softened", "1 (8 oz.) carton lemon yogurt"]
+    ["Drain pears, reserving juice.", "Bring juice to a boil, stirring constantly.",
+    "Remove from heat.", "Add gelatin, stirring until dissolved.", "Let cool slightly.",
+    "Coarsely chop pear halves. Combine cream cheese and yogurt; beat at medium speed
+    of electric mixer until smooth.", "Add gelatin and beat well.", "Stir in pears.",
+    "Pour into an oiled 4-cup mold or Pyrex dish.", "Chill."]
+- source_sentence: Millionaire Pie ["1 large container Cool Whip", "1 large can crushed
+    pineapple", "1 can condensed milk", "3 lemons", "1 c. pecans", "2 graham cracker
+    crusts"] ["Empty Cool Whip into a bowl.", "Drain juice from pineapple.", "Mix
+    Cool Whip and pineapple.", "Add condensed milk.", "Squeeze lemons, remove seeds
+    and add to Cool Whip and pineapple.", "Chop nuts into small pieces and add to
+    mixture.", "Stir all ingredients together and mix well.", "Pour into a graham
+    cracker crust.", "Use top from crust to cover top of pie.", "Chill overnight.",
+    "Makes 2 pies."]
+  sentences:
+  - Jewell Ball'S Chicken ["1 small jar chipped beef, cut up", "4 boned chicken breasts",
+    "1 can cream of mushroom soup", "1 carton sour cream"] ["Place chipped beef on
+    bottom of baking dish.", "Place chicken on top of beef.", "Mix soup and cream
+    together; pour over chicken. Bake, uncovered, at 275\u00b0 for 3 hours."]
+  - Quick Peppermint Puffs ["8 marshmallows", "2 Tbsp. margarine, melted", "1/4 c.
+    crushed peppermint candy", "1 can crescent rolls"] ["Dip marshmallows in melted
+    margarine; roll in candy. Wrap a crescent triangle around each marshmallow, completely
+    covering the marshmallow and square edges of dough tightly to seal.", "Dip in
+    margarine and place in a greased muffin tin.", "Bake at 375\u00b0 for 10 to 15
+    minutes; remove from pan."]
+  - Double Cherry Delight ["1 (17 oz.) can dark sweet pitted cherries", "1/2 c. ginger
+    ale", "1 (6 oz.) pkg. Jell-O cherry flavor gelatin", "2 c. boiling water", "1/8
+    tsp. almond extract", "1 c. miniature marshmallows"] ["Drain cherries, measuring
+    syrup.", "Cut cherries in half.", "Add ginger ale and enough water to syrup to
+    make 1 1/2 cups.", "Dissolve gelatin in boiling water.", "Add measured liquid
+    and almond extract. Chill until very thick.", "Fold in marshmallows and the cherries.
+    Spoon into 6-cup mold.", "Chill until firm, at least 4 hours or overnight.", "Unmold.",
+    "Makes about 5 1/3 cups."]
+- source_sentence: Prize-Winning Meat Loaf ["1 1/2 lb. ground beef", "1 c. tomato
+    juice", "3/4 c. oats (uncooked)", "1 egg, beaten", "1/4 c. chopped onion", "1/4
+    tsp. pepper", "1 1/2 tsp. salt"] ["Mix well.", "Press firmly into an 8 1/2 x 4
+    1/2 x 2 1/2-inch loaf pan.", "Bake in preheated moderate oven.", "Bake at 350\u00b0
+    for 1 hour.", "Let stand 5 minutes before slicing.", "Makes 8 servings."]
+  sentences:
+  - Beer Bread ["3 c. self rising flour", "1 - 12 oz. can beer", "1 Tbsp. sugar"]
+    ["Stir the ingredients together and put in a greased and floured loaf pan.", "Bake
+    at 425 degrees for 50 minutes.", "Drizzle melted butter on top."]
+  - Artichoke Dip ["2 cans or jars artichoke hearts", "1 c. mayonnaise", "1 c. Parmesan
+    cheese"] ["Drain artichokes and chop.", "Mix with mayonnaise and Parmesan cheese.",
+    "After well mixed, bake, uncovered, for 20 to 30 minutes at 350\u00b0.", "Serve
+    with crackers."]
+  - 'One Hour Rolls ["1 c. milk", "2 Tbsp. sugar", "1 pkg. dry yeast", "1 Tbsp. salt",
+    "3 Tbsp. Crisco oil", "2 c. plain flour"] ["Put flour into a large mixing bowl.",
+    "Combine sugar, milk, salt and oil in a saucepan and heat to boiling; remove from
+    heat and let cool to lukewarm.", "Add yeast and mix well.", "Pour into flour and
+    stir.", "Batter will be sticky.", "Roll out batter on a floured board and cut
+    with biscuit cutter.", "Lightly brush tops with melted oleo and fold over.", "Place
+    rolls on a cookie sheet, put in a warm place and let rise for 1 hour.", "Bake
+    at 350\u00b0 for about 20 minutes. Yield: 2 1/2 dozen."]'
+- source_sentence: Watermelon Rind Pickles ["7 lb. watermelon rind", "7 c. sugar",
+    "2 c. apple vinegar", "1/2 tsp. oil of cloves", "1/2 tsp. oil of cinnamon"] ["Trim
+    off green and pink parts of watermelon rind; cut to 1-inch cubes.", "Parboil until
+    tender, but not soft.", "Drain. Combine sugar, vinegar, oil of cloves and oil
+    of cinnamon; bring to boiling and pour over rind.", "Let stand overnight.", "In
+    the morning, drain off syrup.", "Heat and put over rind.", "The third morning,
+    heat rind and syrup; seal in hot, sterilized jars.", "Makes 8 pints.", "(Oil of
+    cinnamon and clove keeps rind clear and transparent.)"]
+  sentences:
+  - Summer Chicken ["1 pkg. chicken cutlets", "1/2 c. oil", "1/3 c. red vinegar",
+    "2 Tbsp. oregano", "2 Tbsp. garlic salt"] ["Double recipe for more chicken."]
+  - Summer Spaghetti ["1 lb. very thin spaghetti", "1/2 bottle McCormick Salad Supreme
+    (seasoning)", "1 bottle Zesty Italian dressing"] ["Prepare spaghetti per package.",
+    "Drain.", "Melt a little butter through it.", "Marinate overnight in Salad Supreme
+    and Zesty Italian dressing.", "Just before serving, add cucumbers, tomatoes, green
+    peppers, mushrooms, olives or whatever your taste may want."]
+  - Chicken Funny ["1 large whole chicken", "2 (10 1/2 oz.) cans chicken gravy", "1
+    (10 1/2 oz.) can cream of mushroom soup", "1 (6 oz.) box Stove Top stuffing",
+    "4 oz. shredded cheese"] ["Boil and debone chicken.", "Put bite size pieces in
+    average size square casserole dish.", "Pour gravy and cream of mushroom soup over
+    chicken; level.", "Make stuffing according to instructions on box (do not make
+    too moist).", "Put stuffing on top of chicken and gravy; level.", "Sprinkle shredded
+    cheese on top and bake at 350\u00b0 for approximately 20 minutes or until golden
+    and bubbly."]
+pipeline_tag: sentence-similarity
+---
+# SentenceTransformer based on sentence-transformers/distilbert-base-nli-mean-tokens
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/distilbert-base-nli-mean-tokens](https://huggingface.co/sentence-transformers/distilbert-base-nli-mean-tokens). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/distilbert-base-nli-mean-tokens](https://huggingface.co/sentence-transformers/distilbert-base-nli-mean-tokens) <!-- at revision 2781c006adbf3726b509caa8649fc8077ff0724d -->
+- **Maximum Sequence Length:** 128 tokens
+- **Output Dimensionality:** 768 tokens
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: DistilBertModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("DivyaMereddy007/RecipeBert_v5original_epoc50Copy_of_TrainSetenceTransforme-Finetuning_v5_DistilledBert")
+# Run inference
+sentences = [
+    'Watermelon Rind Pickles ["7 lb. watermelon rind", "7 c. sugar", "2 c. apple vinegar", "1/2 tsp. oil of cloves", "1/2 tsp. oil of cinnamon"] ["Trim off green and pink parts of watermelon rind; cut to 1-inch cubes.", "Parboil until tender, but not soft.", "Drain. Combine sugar, vinegar, oil of cloves and oil of cinnamon; bring to boiling and pour over rind.", "Let stand overnight.", "In the morning, drain off syrup.", "Heat and put over rind.", "The third morning, heat rind and syrup; seal in hot, sterilized jars.", "Makes 8 pints.", "(Oil of cinnamon and clove keeps rind clear and transparent.)"]',
+    'Summer Chicken ["1 pkg. chicken cutlets", "1/2 c. oil", "1/3 c. red vinegar", "2 Tbsp. oregano", "2 Tbsp. garlic salt"] ["Double recipe for more chicken."]',
+    'Summer Spaghetti ["1 lb. very thin spaghetti", "1/2 bottle McCormick Salad Supreme (seasoning)", "1 bottle Zesty Italian dressing"] ["Prepare spaghetti per package.", "Drain.", "Melt a little butter through it.", "Marinate overnight in Salad Supreme and Zesty Italian dressing.", "Just before serving, add cucumbers, tomatoes, green peppers, mushrooms, olives or whatever your taste may want."]',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 1,746 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                           | sentence_1                                                                           | label                                                          |
+  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                               | float                                                          |
+  | details | <ul><li>min: 63 tokens</li><li>mean: 118.82 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 63 tokens</li><li>mean: 118.59 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.19</li><li>max: 1.0</li></ul> |
+* Samples:
+  | sentence_0                                                                                                                                                                                                                                                                                                                                                                                                             | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | label             |
+  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------|
+  | <code>Tuna Macaroni Casserole ["1 box macaroni and cheese", "1 can tuna, drained", "1 small jar pimentos", "1 medium onion, chopped"] ["Prepare macaroni and cheese as directed.", "Add drained tuna, pimento and onion.", "Mix.", "Serve hot or cold."]</code>                                                                                                                                                        | <code>Easy Fudge ["1 (14 oz.) can sweetened condensed milk", "1 (12 oz.) pkg. semi-sweet chocolate chips", "1 (1 oz.) sq. unsweetened chocolate (if desired)", "1 1/2 c. chopped nuts (if desired)", "1 tsp. vanilla"] ["Butter a square pan, 8 x 8 x 2-inches.", "Heat milk, chocolate chips and unsweetened chocolate over low heat, stirring constantly, until chocolate is melted and mixture is smooth. Remove from heat.", "Stir in nuts and vanilla.", "Spread in pan."]</code> | <code>0.05</code> |
+  | <code>Scalloped Corn ["1 can cream-style corn", "1 can whole kernel corn", "1/2 pkg. (approximately 20) saltine crackers, crushed", "1 egg, beaten", "6 tsp. butter, divided", "pepper to taste"] ["Mix together both cans of corn, crackers, egg, 2 teaspoons of melted butter and pepper and place in a buttered baking dish.", "Dot with remaining 4 teaspoons of butter.", "Bake at 350\u00b0 for 1 hour."]</code> | <code>Quick Peppermint Puffs ["8 marshmallows", "2 Tbsp. margarine, melted", "1/4 c. crushed peppermint candy", "1 can crescent rolls"] ["Dip marshmallows in melted margarine; roll in candy. Wrap a crescent triangle around each marshmallow, completely covering the marshmallow and square edges of dough tightly to seal.", "Dip in margarine and place in a greased muffin tin.", "Bake at 375\u00b0 for 10 to 15 minutes; remove from pan."]</code>                            | <code>0.1</code>  |
+  | <code>Beer Bread ["3 c. self rising flour", "1 - 12 oz. can beer", "1 Tbsp. sugar"] ["Stir the ingredients together and put in a greased and floured loaf pan.", "Bake at 425 degrees for 50 minutes.", "Drizzle melted butter on top."]</code>                                                                                                                                                                        | <code>Rhubarb Coffee Cake ["1 1/2 c. sugar", "1/2 c. butter", "1 egg", "1 c. buttermilk", "2 c. flour", "1/2 tsp. salt", "1 tsp. soda", "1 c. buttermilk", "2 c. rhubarb, finely cut", "1 tsp. vanilla"] ["Cream sugar and butter.", "Add egg and beat well.", "To creamed butter, sugar and egg, add alternately buttermilk with mixture of flour, salt and soda.", "Mix well.", "Add rhubarb and vanilla.", "Pour into greased 9 x 13-inch pan and add Topping."]</code>             | <code>0.4</code>  |
+* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
+  ```json
+  {
+      "loss_fct": "torch.nn.modules.loss.MSELoss"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `num_train_epochs`: 50
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 50
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+</details>
+### Training Logs
+| Epoch   | Step | Training Loss |
+|:-------:|:----:|:-------------:|
+| 4.5455  | 500  | 0.0092        |
+| 9.0909  | 1000 | 0.0091        |
+| 13.6364 | 1500 | 0.0081        |
+| 18.1818 | 2000 | 0.0074        |
+| 22.7273 | 2500 | 0.0071        |
+| 27.2727 | 3000 | 0.0069        |
+| 31.8182 | 3500 | 0.0066        |
+| 36.3636 | 4000 | 0.0065        |
+| 40.9091 | 4500 | 0.0061        |
+| 45.4545 | 5000 | 0.006         |
+| 50.0    | 5500 | 0.0056        |
+### Framework Versions
+- Python: 3.10.12
+- Sentence Transformers: 3.0.1
+- Transformers: 4.41.2
+- PyTorch: 2.3.0+cu121
+- Accelerate: 0.31.0
+- Datasets: 2.19.2
+- Tokenizers: 0.19.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "sentence-transformers/distilbert-base-nli-mean-tokens",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertModel"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "vocab_size": 30522
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.0.1",
+    "transformers": "4.41.2",
+    "pytorch": "2.3.0+cu121"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": null
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6684cddab498c707b62f83092ed9260c4f814ef054aee7310013c4a7be0af49
+size 265462608

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 128,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 128,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff