DivyaMereddy007
/

RecipeBert_v5original_epoc50_Copy_of_TrainSetenceTransforme-Finetuning_v5_DistilledBert

DivyaMereddy007 commited on Jun 7

Commit

df43f9e

•

1 Parent(s): 3625d4d

Add new SentenceTransformer model.

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +454 -0
config.json +24 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,454 @@

+---
+language: []
+library_name: sentence-transformers
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:1746
+- loss:CosineSimilarityLoss
+base_model: sentence-transformers/distilbert-base-nli-mean-tokens
+datasets: []
+widget:
+- source_sentence: Scalloped Corn ["1 can cream-style corn", "1 can whole kernel corn",
+    "1/2 pkg. (approximately 20) saltine crackers, crushed", "1 egg, beaten", "6 tsp.
+    butter, divided", "pepper to taste"] ["Mix together both cans of corn, crackers,
+    egg, 2 teaspoons of melted butter and pepper and place in a buttered baking dish.",
+    "Dot with remaining 4 teaspoons of butter.", "Bake at 350\u00b0 for 1 hour."]
+  sentences:
+  - Artichoke Dip ["2 cans or jars artichoke hearts", "1 c. mayonnaise", "1 c. Parmesan
+    cheese"] ["Drain artichokes and chop.", "Mix with mayonnaise and Parmesan cheese.",
+    "After well mixed, bake, uncovered, for 20 to 30 minutes at 350\u00b0.", "Serve
+    with crackers."]
+  - Scalloped Corn ["1 can cream-style corn", "1 can whole kernel corn", "1/2 pkg.
+    (approximately 20) saltine crackers, crushed", "1 egg, beaten", "6 tsp. butter,
+    divided", "pepper to taste"] ["Mix together both cans of corn, crackers, egg,
+    2 teaspoons of melted butter and pepper and place in a buttered baking dish.",
+    "Dot with remaining 4 teaspoons of butter.", "Bake at 350\u00b0 for 1 hour."]
+  - Chicken Stew ["3 lb. chicken, boiled", "4 medium potatoes, diced", "2 medium onions,
+    chopped", "1 (16 oz.) can creamed corn", "1 (16 oz.) can English peas", "1 (16
+    oz.) can field peas", "1 (16 oz.) can butter beans", "1 (16 oz.) can tomatoes",
+    "1 (46 oz.) can tomato juice", "1 small box macaroni", "1 Tbsp. black pepper",
+    "1 Tbsp. salt", "1 Tbsp. sugar"] ["Remove chicken from bone.", "Use the broth.",
+    "Mix the vegetables and macaroni.", "Add sugar, salt and black pepper.", "Cook
+    until all vegetables are tender over medium heat."]
+- source_sentence: Watermelon Rind Pickles ["7 lb. watermelon rind", "7 c. sugar",
+    "2 c. apple vinegar", "1/2 tsp. oil of cloves", "1/2 tsp. oil of cinnamon"] ["Trim
+    off green and pink parts of watermelon rind; cut to 1-inch cubes.", "Parboil until
+    tender, but not soft.", "Drain. Combine sugar, vinegar, oil of cloves and oil
+    of cinnamon; bring to boiling and pour over rind.", "Let stand overnight.", "In
+    the morning, drain off syrup.", "Heat and put over rind.", "The third morning,
+    heat rind and syrup; seal in hot, sterilized jars.", "Makes 8 pints.", "(Oil of
+    cinnamon and clove keeps rind clear and transparent.)"]
+  sentences:
+  - Cheeseburger Potato Soup ["6 baking potatoes", "1 lb. of extra lean ground beef",
+    "2/3 c. butter or margarine", "6 c. milk", "3/4 tsp. salt", "1/2 tsp. pepper",
+    "1 1/2 c (6 oz.) shredded Cheddar cheese, divided", "12 sliced bacon, cooked,
+    crumbled and divided", "4 green onion, chopped and divided", "1 (8 oz.) carton
+    sour cream (optional)"] ["Wash potatoes; prick several times with a fork.", "Microwave
+    them with a wet paper towel covering the potatoes on high for 6-8 minutes.", "The
+    potatoes should be soft, ready to eat.", "Let them cool enough to handle.", "Cut
+    in half lengthwise; scoop out pulp and reserve.", "Discard shells.", "Brown ground
+    beef until done.", "Drain any grease from the meat.", "Set aside when done.",
+    "Meat will be added later.", "Melt butter in a large kettle over low heat; add
+    flour, stirring until smooth.", "Cook 1 minute, stirring constantly. Gradually
+    add milk; cook over medium heat, stirring constantly, until thickened and bubbly.",
+    "Stir in potato, ground beef, salt, pepper, 1 cup of cheese, 2 tablespoons of
+    green onion and 1/2 cup of bacon.", "Cook until heated (do not boil).", "Stir
+    in sour cream if desired; cook until heated (do not boil).", "Sprinkle with remaining
+    cheese, bacon and green onions."]
+  - Easy Fudge ["1 (14 oz.) can sweetened condensed milk", "1 (12 oz.) pkg. semi-sweet
+    chocolate chips", "1 (1 oz.) sq. unsweetened chocolate (if desired)", "1 1/2 c.
+    chopped nuts (if desired)", "1 tsp. vanilla"] ["Butter a square pan, 8 x 8 x 2-inches.",
+    "Heat milk, chocolate chips and unsweetened chocolate over low heat, stirring
+    constantly, until chocolate is melted and mixture is smooth. Remove from heat.",
+    "Stir in nuts and vanilla.", "Spread in pan."]
+  - Chicken Ole ["4 chicken breasts, cooked", "1 can cream of chicken soup", "1 can
+    cream of mushroom soup", "1 can green chili salsa sauce", "1 can green chilies",
+    "1 c. milk", "1 grated onion", "1 pkg. corn tortilla in pieces"] ["Dice chicken.",
+    "Mix all ingredients together.", "Let sit overnight.", "Bake 1 1/2 hours at 375\u00b0."]
+- source_sentence: Quick Barbecue Wings ["chicken wings (as many as you need for dinner)",
+    "flour", "barbecue sauce (your choice)"] ["Clean wings.", "Flour and fry until
+    done.", "Place fried chicken wings in microwave bowl.", "Stir in barbecue sauce.",
+    "Microwave on High (stir once) for 4 minutes."]
+  sentences:
+  - Creamy Corn ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg. cream cheese, cubed",
+    "1/3 c. butter, cubed", "1/2 tsp. garlic powder", "1/2 tsp. salt", "1/4 tsp. pepper"]
+    ["In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours
+    or until heated through and cheese is melted. Stir well before serving. Yields
+    6 servings."]
+  - Broccoli Salad ["1 large head broccoli (about 1 1/2 lb.)", "10 slices bacon, cooked
+    and crumbled", "5 green onions, sliced or 1/4 c. chopped red onion", "1/2 c. raisins",
+    "1 c. mayonnaise", "2 Tbsp. vinegar", "1/4 c. sugar"] ["Trim off large leaves
+    of broccoli and remove the tough ends of lower stalks. Wash the broccoli thoroughly.
+    Cut the florets and stems into bite-size pieces. Place in a large bowl. Add bacon,
+    onions and raisins. Combine remaining ingredients, stirring well. Add dressing
+    to broccoli mixture and toss gently. Cover and refrigerate 2 to 3 hours. Makes
+    about 6 servings."]
+  - Vegetable-Burger Soup ["1/2 lb. ground beef", "2 c. water", "1 tsp. sugar", "1
+    pkg. Cup-a-Soup onion soup mix (dry)", "1 lb. can stewed tomatoes", "1 (8 oz.)
+    can tomato sauce", "1 (10 oz.) pkg. frozen mixed vegetables"] ["Lightly brown
+    beef in soup pot.", "Drain off excess fat.", "Stir in tomatoes, tomato sauce,
+    water, frozen vegetables, soup mix and sugar.", "Bring to a boil.", "Reduce heat
+    and simmer for 20 minutes. Serve."]
+- source_sentence: 'Eggless Milkless Applesauce Cake ["3/4 c. sugar", "1/2 c. shortening",
+    "1 1/2 c. applesauce", "3 level tsp. soda", "1 tsp. each: cinnamon, cloves and
+    nutmeg", "2 c. sifted flour", "1 c. raisins", "1 c. nuts"] ["Mix Crisco with applesauce,
+    nuts and raisins.", "Sift dry ingredients and add.", "Mix well.", "Put in a greased
+    and floured loaf pan or tube pan.", "Bake in loaf pan at 350\u00b0 to 375\u00b0
+    for 45 to 60 minutes, layer pan at 375\u00b0 for 20 minutes or tube pan at 325\u00b0
+    for 1 hour."]'
+  sentences:
+  - Broccoli Dip For Crackers ["16 oz. sour cream", "1 pkg. dry vegetable soup mix",
+    "10 oz. pkg. frozen chopped broccoli, thawed and drained", "4 to 6 oz. Cheddar
+    cheese, grated"] ["Mix together sour cream, soup mix, broccoli and half of cheese.",
+    "Sprinkle remaining cheese on top.", "Bake at 350\u00b0 for 30 minutes, uncovered.",
+    "Serve hot with vegetable crackers."]
+  - Potato And Cheese Pie ["3 eggs", "1 tsp. salt", "1/4 tsp. pepper", "2 c. half
+    and half", "3 c. potatoes, shredded coarse", "1 c. Cheddar cheese, coarsely shredded",
+    "1/3 c. green onions"] ["Beat eggs, salt and pepper until well blended.", "Stir
+    in half and half, potatoes and onions.", "Pour into well-greased 8-inch baking
+    dish.", "Bake in a 400\u00b0 oven for 35 to 40 minutes, or until knife inserted
+    in center comes out clean and potatoes are tender. Cool on rack 5 minutes; cut
+    into squares.", "Makes 4 large servings."]
+  - Angel Biscuits ["5 c. flour", "3 Tbsp. sugar", "4 tsp. baking powder", "1 1/2
+    pkg. dry yeast", "2 c. buttermilk", "1 tsp. soda", "1 1/2 sticks margarine", "1/2
+    c. warm water"] ["Mix flour, sugar, baking powder, soda and salt together.", "Cut
+    in margarine, dissolve yeast in warm water.", "Stir into buttermilk and add to
+    dry mixture.", "Cover and chill."]
+- source_sentence: Rhubarb Coffee Cake ["1 1/2 c. sugar", "1/2 c. butter", "1 egg",
+    "1 c. buttermilk", "2 c. flour", "1/2 tsp. salt", "1 tsp. soda", "1 c. buttermilk",
+    "2 c. rhubarb, finely cut", "1 tsp. vanilla"] ["Cream sugar and butter.", "Add
+    egg and beat well.", "To creamed butter, sugar and egg, add alternately buttermilk
+    with mixture of flour, salt and soda.", "Mix well.", "Add rhubarb and vanilla.",
+    "Pour into greased 9 x 13-inch pan and add Topping."]
+  sentences:
+  - Prize-Winning Meat Loaf ["1 1/2 lb. ground beef", "1 c. tomato juice", "3/4 c.
+    oats (uncooked)", "1 egg, beaten", "1/4 c. chopped onion", "1/4 tsp. pepper",
+    "1 1/2 tsp. salt"] ["Mix well.", "Press firmly into an 8 1/2 x 4 1/2 x 2 1/2-inch
+    loaf pan.", "Bake in preheated moderate oven.", "Bake at 350\u00b0 for 1 hour.",
+    "Let stand 5 minutes before slicing.", "Makes 8 servings."]
+  - Angel Biscuits ["5 c. flour", "3 Tbsp. sugar", "4 tsp. baking powder", "1 1/2
+    pkg. dry yeast", "2 c. buttermilk", "1 tsp. soda", "1 1/2 sticks margarine", "1/2
+    c. warm water"] ["Mix flour, sugar, baking powder, soda and salt together.", "Cut
+    in margarine, dissolve yeast in warm water.", "Stir into buttermilk and add to
+    dry mixture.", "Cover and chill."]
+  - 'Smothered Round Steak(Servings: 4)   ["2 lb. round steak", "1/2 tsp. ground black
+    pepper", "1 tsp. ground white pepper", "1/2 c. vegetable oil", "2 bell peppers,
+    chopped", "1 c. beef stock or water", "2 tsp. salt", "1 tsp. ground red pepper",
+    "all-purpose flour (dredging)", "3 medium onions, chopped", "1 celery rib, chopped"]
+    ["Alex Patout says, \"Smothering is a multipurpose Cajun technique that works
+    wonders with everything from game to snap beans.", "It''s similar to what the
+    rest of the world knows as braising.", "The ingredients are briefly browned or
+    sauteed, then cooked with a little liquid over a low heat for a long time.\""]'
+pipeline_tag: sentence-similarity
+---
+# SentenceTransformer based on sentence-transformers/distilbert-base-nli-mean-tokens
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/distilbert-base-nli-mean-tokens](https://huggingface.co/sentence-transformers/distilbert-base-nli-mean-tokens). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/distilbert-base-nli-mean-tokens](https://huggingface.co/sentence-transformers/distilbert-base-nli-mean-tokens) <!-- at revision 2781c006adbf3726b509caa8649fc8077ff0724d -->
+- **Maximum Sequence Length:** 128 tokens
+- **Output Dimensionality:** 768 tokens
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: DistilBertModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("DivyaMereddy007/RecipeBert_v5original_epoc50_Copy_of_TrainSetenceTransforme-Finetuning_v5_DistilledBert")
+# Run inference
+sentences = [
+    'Rhubarb Coffee Cake ["1 1/2 c. sugar", "1/2 c. butter", "1 egg", "1 c. buttermilk", "2 c. flour", "1/2 tsp. salt", "1 tsp. soda", "1 c. buttermilk", "2 c. rhubarb, finely cut", "1 tsp. vanilla"] ["Cream sugar and butter.", "Add egg and beat well.", "To creamed butter, sugar and egg, add alternately buttermilk with mixture of flour, salt and soda.", "Mix well.", "Add rhubarb and vanilla.", "Pour into greased 9 x 13-inch pan and add Topping."]',
+    'Prize-Winning Meat Loaf ["1 1/2 lb. ground beef", "1 c. tomato juice", "3/4 c. oats (uncooked)", "1 egg, beaten", "1/4 c. chopped onion", "1/4 tsp. pepper", "1 1/2 tsp. salt"] ["Mix well.", "Press firmly into an 8 1/2 x 4 1/2 x 2 1/2-inch loaf pan.", "Bake in preheated moderate oven.", "Bake at 350\\u00b0 for 1 hour.", "Let stand 5 minutes before slicing.", "Makes 8 servings."]',
+    'Angel Biscuits ["5 c. flour", "3 Tbsp. sugar", "4 tsp. baking powder", "1 1/2 pkg. dry yeast", "2 c. buttermilk", "1 tsp. soda", "1 1/2 sticks margarine", "1/2 c. warm water"] ["Mix flour, sugar, baking powder, soda and salt together.", "Cut in margarine, dissolve yeast in warm water.", "Stir into buttermilk and add to dry mixture.", "Cover and chill."]',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 1,746 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                           | sentence_1                                                                           | label                                                          |
+  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                               | float                                                          |
+  | details | <ul><li>min: 63 tokens</li><li>mean: 119.05 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 63 tokens</li><li>mean: 118.49 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.19</li><li>max: 1.0</li></ul> |
+* Samples:
+  | sentence_0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | label            |
+  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>Strawberry Whatever ["1 lb. frozen strawberries in juice", "1 small can crushed pineapple", "3 ripe bananas", "1 c. chopped pecans", "1 large pkg. strawberry Jell-O", "1 1/2 c. boiling water", "1 pt. sour cream"] ["Mix Jell-O in boiling water.", "Add strawberries, pineapple, crushed bananas and nuts.", "Spread 1/2 mixture in 13 x 6 1/2-inch pan.", "Allow to gel in freezer 30 minutes.", "Add layer of sour cream, then remaining mixture on top.", "Gel and serve."]</code> | <code>One Hour Rolls ["1 c. milk", "2 Tbsp. sugar", "1 pkg. dry yeast", "1 Tbsp. salt", "3 Tbsp. Crisco oil", "2 c. plain flour"] ["Put flour into a large mixing bowl.", "Combine sugar, milk, salt and oil in a saucepan and heat to boiling; remove from heat and let cool to lukewarm.", "Add yeast and mix well.", "Pour into flour and stir.", "Batter will be sticky.", "Roll out batter on a floured board and cut with biscuit cutter.", "Lightly brush tops with melted oleo and fold over.", "Place rolls on a cookie sheet, put in a warm place and let rise for 1 hour.", "Bake at 350\u00b0 for about 20 minutes. Yield: 2 1/2 dozen."]</code> | <code>0.1</code> |
+  | <code>Broccoli Dip For Crackers ["16 oz. sour cream", "1 pkg. dry vegetable soup mix", "10 oz. pkg. frozen chopped broccoli, thawed and drained", "4 to 6 oz. Cheddar cheese, grated"] ["Mix together sour cream, soup mix, broccoli and half of cheese.", "Sprinkle remaining cheese on top.", "Bake at 350\u00b0 for 30 minutes, uncovered.", "Serve hot with vegetable crackers."]</code>                                                                                                   | <code>Vegetable-Burger Soup ["1/2 lb. ground beef", "2 c. water", "1 tsp. sugar", "1 pkg. Cup-a-Soup onion soup mix (dry)", "1 lb. can stewed tomatoes", "1 (8 oz.) can tomato sauce", "1 (10 oz.) pkg. frozen mixed vegetables"] ["Lightly brown beef in soup pot.", "Drain off excess fat.", "Stir in tomatoes, tomato sauce, water, frozen vegetables, soup mix and sugar.", "Bring to a boil.", "Reduce heat and simmer for 20 minutes. Serve."]</code>                                                                                                                                                                                                  | <code>0.4</code> |
+  | <code>Summer Spaghetti ["1 lb. very thin spaghetti", "1/2 bottle McCormick Salad Supreme (seasoning)", "1 bottle Zesty Italian dressing"] ["Prepare spaghetti per package.", "Drain.", "Melt a little butter through it.", "Marinate overnight in Salad Supreme and Zesty Italian dressing.", "Just before serving, add cucumbers, tomatoes, green peppers, mushrooms, olives or whatever your taste may want."]</code>                                                                        | <code>Chicken Funny ["1 large whole chicken", "2 (10 1/2 oz.) cans chicken gravy", "1 (10 1/2 oz.) can cream of mushroom soup", "1 (6 oz.) box Stove Top stuffing", "4 oz. shredded cheese"] ["Boil and debone chicken.", "Put bite size pieces in average size square casserole dish.", "Pour gravy and cream of mushroom soup over chicken; level.", "Make stuffing according to instructions on box (do not make too moist).", "Put stuffing on top of chicken and gravy; level.", "Sprinkle shredded cheese on top and bake at 350\u00b0 for approximately 20 minutes or until golden and bubbly."]</code>                                               | <code>0.3</code> |
+* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
+  ```json
+  {
+      "loss_fct": "torch.nn.modules.loss.MSELoss"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `num_train_epochs`: 50
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 50
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+</details>
+### Training Logs
+| Epoch   | Step | Training Loss |
+|:-------:|:----:|:-------------:|
+| 4.5455  | 500  | 0.0594        |
+| 9.0909  | 1000 | 0.0099        |
+| 13.6364 | 1500 | 0.0085        |
+| 18.1818 | 2000 | 0.0077        |
+| 22.7273 | 2500 | 0.0074        |
+| 27.2727 | 3000 | 0.0071        |
+| 31.8182 | 3500 | 0.0068        |
+| 36.3636 | 4000 | 0.0066        |
+| 40.9091 | 4500 | 0.0063        |
+| 45.4545 | 5000 | 0.006         |
+| 50.0    | 5500 | 0.0057        |
+### Framework Versions
+- Python: 3.10.12
+- Sentence Transformers: 3.0.1
+- Transformers: 4.41.2
+- PyTorch: 2.3.0+cu121
+- Accelerate: 0.31.0
+- Datasets: 2.19.2
+- Tokenizers: 0.19.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "sentence-transformers/distilbert-base-nli-mean-tokens",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertModel"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "vocab_size": 30522
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.0.1",
+    "transformers": "4.41.2",
+    "pytorch": "2.3.0+cu121"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": null
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04c064513ad8cdbd52ad90123b6a208f7f97365b88f0d4bb194516a42a927fcc
+size 265462608

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 128,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 128,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff