{ "cells": [ { "cell_type": "code", "execution_count": 16, "id": "e3000a69", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at human-centered-summarization/financial-summarization-pegasus and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "from transformers import PegasusTokenizer, PegasusForConditionalGeneration, TFPegasusForConditionalGeneration\n", "from rouge import Rouge\n", "\n", "# Let's load the model and the tokenizer \n", "model_name = \"human-centered-summarization/financial-summarization-pegasus\"\n", "tokenizer = PegasusTokenizer.from_pretrained(model_name, local_files_only=True)\n", "model = PegasusForConditionalGeneration.from_pretrained(model_name, local_files_only=True)" ] }, { "cell_type": "code", "execution_count": 22, "id": "6832cc0c", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2 8\n", "0.09230769142721895\n", "0.02312138672190853\n", "0.09230769142721895\n", "----------------------------------------------------------------------\n", "2 32\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "2 64\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "2 128\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "2 256\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "\n", "5 8\n", "0.09230769142721895\n", "0.02312138672190853\n", "0.09230769142721895\n", "----------------------------------------------------------------------\n", "5 32\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "5 64\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "5 128\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "5 256\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "\n", "8 8\n", "0.09230769142721895\n", "0.02312138672190853\n", "0.09230769142721895\n", "----------------------------------------------------------------------\n", "8 32\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "8 64\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "8 128\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "8 256\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "\n", "12 8\n", "0.09230769142721895\n", "0.02312138672190853\n", "0.09230769142721895\n", "----------------------------------------------------------------------\n", "12 32\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "12 64\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "12 128\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "12 256\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "\n", "20 8\n", "0.09230769142721895\n", "0.02312138672190853\n", "0.09230769142721895\n", "----------------------------------------------------------------------\n", "20 32\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "20 64\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "20 128\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "20 256\n", "0.28767123031713265\n", "0.11578947163656512\n", "0.2465753399061738\n", "----------------------------------------------------------------------\n", "\n" ] } ], "source": [ "reference = \"National Commercial Bank (NCB), Saudi Arabia’s largest lender by assets, agreed to buy rival Samba Financial Group for $15 billion in the biggest banking takeover this year.NCB will pay 28.45 riyals ($7.58) for each Samba share, according to a statement on Sunday, valuing it at about 55.7 billion riyals. NCB will offer 0.739 new shares for each Samba share, at the lower end of the 0.736-0.787 ratio the banks set when they signed an initial framework agreement in June.The offer is a 3.5% premium to Samba’s Oct. 8 closing price of 27.50 riyals and about 24% higher than the level the shares traded at before the talks were made public. Bloomberg News first reported the merger discussions.The new bank will have total assets of more than $220 billion, creating the Gulf region’s third-largest lender. The entity’s $46 billion market capitalization nearly matches that of Qatar National Bank QPSC, which is still the Middle East’s biggest lender with about $268 billion of assets.\"\n", "for num_beams in [2, 5, 8, 12, 20]:\n", " for max_length in [8, 32, 64, 128, 256]:\n", " print(num_beams, max_length)\n", " input_ids = tokenizer(reference, return_tensors=\"pt\").input_ids\n", "\n", " # Generate the output (Here, we use beam search but you can also use any other strategy you like)\n", " output = model.generate(\n", " input_ids, \n", " max_length=max_length, \n", " num_beams=5, \n", " early_stopping=True\n", " )\n", "\n", " summary = tokenizer.decode(output[0], skip_special_tokens=True)\n", " ROUGE = Rouge()\n", " scores = ROUGE.get_scores(summary, reference)\n", " for rouge, score in scores[-1].items():\n", " print(score['f'])\n", " print('-' * 70)\n", " print()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 5 }