diff --git "a/research/05_data_gpt.ipynb" "b/research/05_data_gpt.ipynb" new file mode 100644--- /dev/null +++ "b/research/05_data_gpt.ipynb" @@ -0,0 +1,2468 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os; os.chdir('..')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gpt3_textwiki_bio_textgpt3_sentencesannotationwiki_bio_test_idxgpt3_text_samples
0John Russell Reynolds (1820–1876) was an Engli...Sir John Russell Reynolds, 1st Baronet (22 May...['John Russell Reynolds (1820–1876) was an Eng...['major_inaccurate' 'major_inaccurate' 'major_...62464['John Russell Reynolds (1 November 1829 – 11...
1Matthew Aylmer, 1st Baron Aylmer (1708–1794) w...Admiral of the Fleet Matthew Aylmer, 1st Baron...['Matthew Aylmer, 1st Baron Aylmer (1708–1794)...['minor_inaccurate' 'minor_inaccurate' 'minor_...49661['\"Matthew Aylmer, 1st Baron Aylmer (c. 1650–1...
2Rick Mahler (born Richard Alan Mahler on April...Richard Keith Mahler (August 5, 1953 in Austin...['Rick Mahler (born Richard Alan Mahler on Apr...['minor_inaccurate' 'minor_inaccurate' 'accura...20483['Rick Mahler (January 8, 1956 – May 25, 2005)...
3James Blair (1732–1782) was an American lawyer...James Blair (September 26, 1786 - April 1, 183...['James Blair (1732–1782) was an American lawy...['minor_inaccurate' 'major_inaccurate' 'major_...71174['James Blair (April 2, 1755 – March 8, 1842) ...
4Tim Finchem (born August 24, 1947) is an Ameri...Timothy W. Finchem (born April 19, 1947) is th...['Tim Finchem (born August 24, 1947) is an Ame...['minor_inaccurate' 'accurate' 'major_inaccura...39945['\"Tim Finchem (born May 27, 1953) is an Ameri...
\n", + "
" + ], + "text/plain": [ + " gpt3_text \\\n", + "0 John Russell Reynolds (1820–1876) was an Engli... \n", + "1 Matthew Aylmer, 1st Baron Aylmer (1708–1794) w... \n", + "2 Rick Mahler (born Richard Alan Mahler on April... \n", + "3 James Blair (1732–1782) was an American lawyer... \n", + "4 Tim Finchem (born August 24, 1947) is an Ameri... \n", + "\n", + " wiki_bio_text \\\n", + "0 Sir John Russell Reynolds, 1st Baronet (22 May... \n", + "1 Admiral of the Fleet Matthew Aylmer, 1st Baron... \n", + "2 Richard Keith Mahler (August 5, 1953 in Austin... \n", + "3 James Blair (September 26, 1786 - April 1, 183... \n", + "4 Timothy W. Finchem (born April 19, 1947) is th... \n", + "\n", + " gpt3_sentences \\\n", + "0 ['John Russell Reynolds (1820–1876) was an Eng... \n", + "1 ['Matthew Aylmer, 1st Baron Aylmer (1708–1794)... \n", + "2 ['Rick Mahler (born Richard Alan Mahler on Apr... \n", + "3 ['James Blair (1732–1782) was an American lawy... \n", + "4 ['Tim Finchem (born August 24, 1947) is an Ame... \n", + "\n", + " annotation wiki_bio_test_idx \\\n", + "0 ['major_inaccurate' 'major_inaccurate' 'major_... 62464 \n", + "1 ['minor_inaccurate' 'minor_inaccurate' 'minor_... 49661 \n", + "2 ['minor_inaccurate' 'minor_inaccurate' 'accura... 20483 \n", + "3 ['minor_inaccurate' 'major_inaccurate' 'major_... 71174 \n", + "4 ['minor_inaccurate' 'accurate' 'major_inaccura... 39945 \n", + "\n", + " gpt3_text_samples \n", + "0 ['John Russell Reynolds (1 November 1829 – 11... \n", + "1 ['\"Matthew Aylmer, 1st Baron Aylmer (c. 1650–1... \n", + "2 ['Rick Mahler (January 8, 1956 – May 25, 2005)... \n", + "3 ['James Blair (April 2, 1755 – March 8, 1842) ... \n", + "4 ['\"Tim Finchem (born May 27, 1953) is an Ameri... " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df= pd.read_csv(\"data/original_data_gpt3.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0Sir John Russell Reynolds, 1st Baronet (22 May...0
1Admiral of the Fleet Matthew Aylmer, 1st Baron...0
2Richard Keith Mahler (August 5, 1953 in Austin...0
3James Blair (September 26, 1786 - April 1, 183...0
4Timothy W. Finchem (born April 19, 1947) is th...0
.........
233Baba Gündüz Kılıç (1918-1980) was a Turkish fo...0
234Michael Replogle is an internationally recogni...0
235William John Burke (Polonized as Burkeauskas; ...0
236Ted Childs commenced training as a programme d...0
237Edward Synge (1659–1741) was an Anglican clerg...0
\n", + "

238 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 Sir John Russell Reynolds, 1st Baronet (22 May... 0\n", + "1 Admiral of the Fleet Matthew Aylmer, 1st Baron... 0\n", + "2 Richard Keith Mahler (August 5, 1953 in Austin... 0\n", + "3 James Blair (September 26, 1786 - April 1, 183... 0\n", + "4 Timothy W. Finchem (born April 19, 1947) is th... 0\n", + ".. ... ...\n", + "233 Baba Gündüz Kılıç (1918-1980) was a Turkish fo... 0\n", + "234 Michael Replogle is an internationally recogni... 0\n", + "235 William John Burke (Polonized as Burkeauskas; ... 0\n", + "236 Ted Childs commenced training as a programme d... 0\n", + "237 Edward Synge (1659–1741) was an Anglican clerg... 0\n", + "\n", + "[238 rows x 2 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df_human= pd.DataFrame({\n", + " \"Paragraph\": df.wiki_bio_text, \n", + " \"AI_generated\": pd.Series([0]*len(df))\n", + " \n", + " }\n", + ")\n", + "new_df_human" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0John Russell Reynolds (1820–1876) was an Engli...1
1Matthew Aylmer, 1st Baron Aylmer (1708–1794) w...1
2Rick Mahler (born Richard Alan Mahler on April...1
3James Blair (1732–1782) was an American lawyer...1
4Tim Finchem (born August 24, 1947) is an Ameri...1
.........
233Gündüz Kılıç (born 28 April 1988) is a Turkish...1
234Michael Replogle (born 1951) is an American en...1
235Billy Burke (born October 28, 1894 – died Apri...1
236Ted Childs (born October 15, 1956) is an Ameri...1
237Edward Synge (1714–1798) was an Irish Anglican...1
\n", + "

238 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 John Russell Reynolds (1820–1876) was an Engli... 1\n", + "1 Matthew Aylmer, 1st Baron Aylmer (1708–1794) w... 1\n", + "2 Rick Mahler (born Richard Alan Mahler on April... 1\n", + "3 James Blair (1732–1782) was an American lawyer... 1\n", + "4 Tim Finchem (born August 24, 1947) is an Ameri... 1\n", + ".. ... ...\n", + "233 Gündüz Kılıç (born 28 April 1988) is a Turkish... 1\n", + "234 Michael Replogle (born 1951) is an American en... 1\n", + "235 Billy Burke (born October 28, 1894 – died Apri... 1\n", + "236 Ted Childs (born October 15, 1956) is an Ameri... 1\n", + "237 Edward Synge (1714–1798) was an Irish Anglican... 1\n", + "\n", + "[238 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df_ai= pd.DataFrame({\n", + " \"Paragraph\": df.gpt3_text, \n", + " \"AI_generated\": pd.Series([1]*len(df))\n", + " \n", + " }\n", + ")\n", + "new_df_ai" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"John Russell Reynolds (1820–1876) was an English lawyer, judge, and author. He was born in London, the son of a barrister, and was educated at Eton College and Trinity College, Cambridge. He was called to the bar in 1845, and became a Queen's Counsel in 1859. He was appointed a judge of the Court of Common Pleas in 1867, and was knighted in 1871.\\n\\nReynolds was a prolific author, writing on a wide range of topics. He wrote several books on legal topics, including The Law of Libel and Slander (1863), The Law of Copyright (1865), and The Law of Patents for Inventions (1868). He also wrote on a variety of other topics, including history, biography, and literature. He was a frequent contributor to the Saturday Review, and wrote several books on Shakespeare, including The Mystery of William Shakespeare (1848) and The Authorship of Shakespeare (1875). He also wrote a biography of the poet John Keats (1848).\"" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df_ai.Paragraph.values[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Sir John Russell Reynolds, 1st Baronet (22 May 1828 – 29 May 1896) was a British neurologist and physician. Reynolds was born in Romsey, Hampshire, as the son of John Reynolds, an independent minister, and the grandson of Dr. Henry Revell Reynolds. He received general education from his father, and was educated in his profession at University College, London, where he obtained three gold medals in the medical school. In 1851, he graduated M.B. in the University of London, and obtained a scholarship and gold medal in medicine. In 1852, he took the degree of M.D., and began practice in Leeds. He soon after moved to London, and took a house, 38 Grosvenor Street, in which Dr. Marshall Hall had lived. Hall announced to his patients in a printed circular that Reynolds had succeeded him in practice. Such procedure was contrary to a recognised understanding among physicians, and Hall incurred the censure of the College of Physicians. Reynolds, who was ignorant of Hall\\'s intention, was in no way responsible for the circular, and was in no way involved in the censure. He was duly elected a Fellow of the College in 1859. In the same year, he was appointed Assistant Physician to University College Hospital, to which he continued attached throughout life. He had before been, in 1855, Assistant Physician to the Hospital for Sick Children, and in 1857 Assistant Physician to the Westminster Hospital. In 1865, he became Professor of the Principles and Practice of Medicine at University College, and in 1878 he was appointed Physician-in-Ordinary to the Queen\\'s Household. He gained a considerable practice as a physician, and was often consulted in difficult cases of nervous disease. In 1869, he was elected a Fellow of the Royal Society, and in 1883 Vice-President of the Royal Medical and Chirurgical Society. He delivered the Lumleian Lecture at the College of Physicians in 1867, the Harveian Oration in 1884, and was elected President in 1893, on the death of Sir Andrew Clark. He was re-elected in 1894 and 1895, and on 1 January in the latter year was created a Baronet. In the winter of 1895–96, he became too ill to offer himself for re-election as President of the College of Physicians. He died at his house in Grosvenor Street, London, after several weeks of illness of a pulmonary congestion, on 29 May 1896. He was married, first, to Miss Ainslie, and, secondly, to Frances, widow of C. J. C. Crespigny, but left no children. The \"Reynolds Baronetcy\", of Grosvenor Street, was created in the Baronetage of the United Kingdom on 28 February 1895 for John Russell Reynolds. The title became extinct on his death. Reynolds devoted himself from an early period to the study of nervous diseases, and in 1854 published an \"Essay on Vertigo\"; in 1855 \"Diagnosis of Diseases of the Brain, Spinal Cord, and Nerves\", as well as \"Tables for the Diagnosis of Diseases of the Brain\"; in 1861 a treatise on epilepsy; in 1871 \"Lectures on the Clinical Uses of Electricity\"; in 1872 \"The Scientific Value of the Legal Tests of Insanity\"; besides many papers in medical periodicals and the transactions of medical societies, and several addresses to medical associations. His writings on nervous diseases were useful contributions to a department of medicine in which much work remained undone, but in the flood of modern observations they have been submerged. He will chiefly be remembered among physicians as the editor of the \"System of Medicine\", in five volumes, published from 1866 to 1879, a collection of essays on diseases, written by the most competent men who could be induced to write – compositions of varying merit, but generally of high value. He himself wrote the parts on erysipelas, on inflammation of the lymphatics, and on several forms of nervous disease. He published in 1893 a \"Life of Dr. Walter Hayle Walshe\". Reynolds was a tall man, with dark hair, with a dignified delivery and some oratorical power. Reynolds died in London.'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df_human.Paragraph.values[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0John Russell Reynolds (1820–1876) was an Engli...1
1Matthew Aylmer, 1st Baron Aylmer (1708–1794) w...1
2Rick Mahler (born Richard Alan Mahler on April...1
3James Blair (1732–1782) was an American lawyer...1
4Tim Finchem (born August 24, 1947) is an Ameri...1
.........
233Baba Gündüz Kılıç (1918-1980) was a Turkish fo...0
234Michael Replogle is an internationally recogni...0
235William John Burke (Polonized as Burkeauskas; ...0
236Ted Childs commenced training as a programme d...0
237Edward Synge (1659–1741) was an Anglican clerg...0
\n", + "

476 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 John Russell Reynolds (1820–1876) was an Engli... 1\n", + "1 Matthew Aylmer, 1st Baron Aylmer (1708–1794) w... 1\n", + "2 Rick Mahler (born Richard Alan Mahler on April... 1\n", + "3 James Blair (1732–1782) was an American lawyer... 1\n", + "4 Tim Finchem (born August 24, 1947) is an Ameri... 1\n", + ".. ... ...\n", + "233 Baba Gündüz Kılıç (1918-1980) was a Turkish fo... 0\n", + "234 Michael Replogle is an internationally recogni... 0\n", + "235 William John Burke (Polonized as Burkeauskas; ... 0\n", + "236 Ted Childs commenced training as a programme d... 0\n", + "237 Edward Synge (1659–1741) was an Anglican clerg... 0\n", + "\n", + "[476 rows x 2 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "concat_df= pd.concat([new_df_ai, new_df_human])\n", + "concat_df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "cdf_shffeled= concat_df.sample(frac=1).reset_index(drop=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "cdf_shffeled.to_csv(\"data/AI_checker_gpt3_remade.csv\", index=False)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training Model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from datasets import Dataset, load_dataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
337Arthur Sewall (November 25, 1835 – September 5...0
315Cicero Hunt Lewis (1825–1904) was an American ...1
75Michael Joseph \"Micky\" Moody (born 30 August 1...0
102Noel Anthony Hogan (born 25 December 1971) is ...1
305Don R. Swanson (October 10, 1924 - November 18...0
2Guðlaugur Þór Þórðarson (born 19 December 1967...0
390Sirið Stenberg (born 28 April 1988) is a Faroe...1
403Johan Christian Claussen Dahl (February 24, 17...0
370Richard Allen \"Rich\" Williams (born February 1...0
364Riccardo Tisci (born 1974 in Taranto, Italy) i...0
\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "337 Arthur Sewall (November 25, 1835 – September 5... 0\n", + "315 Cicero Hunt Lewis (1825–1904) was an American ... 1\n", + "75 Michael Joseph \"Micky\" Moody (born 30 August 1... 0\n", + "102 Noel Anthony Hogan (born 25 December 1971) is ... 1\n", + "305 Don R. Swanson (October 10, 1924 - November 18... 0\n", + "2 Guðlaugur Þór Þórðarson (born 19 December 1967... 0\n", + "390 Sirið Stenberg (born 28 April 1988) is a Faroe... 1\n", + "403 Johan Christian Claussen Dahl (February 24, 17... 0\n", + "370 Richard Allen \"Rich\" Williams (born February 1... 0\n", + "364 Riccardo Tisci (born 1974 in Taranto, Italy) i... 0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= pd.read_csv('data/AI_checker_gpt3_remade.csv')\n", + "\n", + "\n", + "df.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
98518Hisako Shimizu Hibi (1907–1991) was a Japanese...0
242695In 2009 Đặng Nhật Minh's film Don't Burn (Đừng...0
249411Sallie Wilson (1932, Fort Worth, Texas – Febru...1
269581Authentic leadership, while having no formal o...0
147495Katherine Phillips is an American educator and...1
296421Mardepodect (developmental code name PF-254592...0
272305A rehearsal report is a form created by an art...1
277337The Katoor Dynasty (also spelled Katur and Kat...1
209934Peninsular Spanish , also known as the Spanish...0
238498The softmax function, also known as softargmax...1
\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "98518 Hisako Shimizu Hibi (1907–1991) was a Japanese... 0\n", + "242695 In 2009 Đặng Nhật Minh's film Don't Burn (Đừng... 0\n", + "249411 Sallie Wilson (1932, Fort Worth, Texas – Febru... 1\n", + "269581 Authentic leadership, while having no formal o... 0\n", + "147495 Katherine Phillips is an American educator and... 1\n", + "296421 Mardepodect (developmental code name PF-254592... 0\n", + "272305 A rehearsal report is a form created by an art... 1\n", + "277337 The Katoor Dynasty (also spelled Katur and Kat... 1\n", + "209934 Peninsular Spanish , also known as the Spanish... 0\n", + "238498 The softmax function, also known as softargmax... 1" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2= pd.read_csv('data/AI_checker_remade.csv')\n", + "\n", + "\n", + "df2.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0Syd Rapson (born Sydney Rapson, 28 April 1932 ...1
1Bertram Deacon (8 November 1922 – 3 January 19...0
2Guðlaugur Þór Þórðarson (born 19 December 1967...0
3Achille-Ferdinand Carrier (February 15, 1859 -...0
4Rick Mahler (born Richard Alan Mahler on April...1
.........
299995James Wrighten (b. 1745 - d. 1793) was an Amer...1
299996The U-matrix (unified distance matrix) is a sy...1
299997Holding Trevor is a 2007 American gay-themed p...1
299998Jarvisfield is a heritage-listed former pastor...0
299999The Silver Guardian is a Chinese web series c...1
\n", + "

300476 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 Syd Rapson (born Sydney Rapson, 28 April 1932 ... 1\n", + "1 Bertram Deacon (8 November 1922 – 3 January 19... 0\n", + "2 Guðlaugur Þór Þórðarson (born 19 December 1967... 0\n", + "3 Achille-Ferdinand Carrier (February 15, 1859 -... 0\n", + "4 Rick Mahler (born Richard Alan Mahler on April... 1\n", + "... ... ...\n", + "299995 James Wrighten (b. 1745 - d. 1793) was an Amer... 1\n", + "299996 The U-matrix (unified distance matrix) is a sy... 1\n", + "299997 Holding Trevor is a 2007 American gay-themed p... 1\n", + "299998 Jarvisfield is a heritage-listed former pastor... 0\n", + "299999 The Silver Guardian is a Chinese web series c... 1\n", + "\n", + "[300476 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "concat_df= pd.concat([df, df2])\n", + "concat_df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df= concat_df.sample(frac=1).reset_index(drop=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textlabel
121493Ori Kritz (he: אורי קריץ) is an Israeli-born a...1
28946HMS Yarmouth was a 64-gun third rate ship of t...0
127419Ryan Taylor (born June 18, 1992) is an America...0
44654The 1962 Bluebonnet Bowl was an American colle...1
272756Dichorisandrinae is a subtribe within the trib...1
91933Phalaenopsis subparishii, also known as 短茎萼脊兰 ...1
214281Thomas Forsyth Torrance (30 August 1913 – 5 O...1
51312Yoann Maestri (; born 14 January 1988) is a Fr...0
131201Lonnie Shields (born April 17, 1956) is an Ame...1
61440John Shelton Fenty (born 10 December 1961) is ...1
\n", + "
" + ], + "text/plain": [ + " text label\n", + "121493 Ori Kritz (he: אורי קריץ) is an Israeli-born a... 1\n", + "28946 HMS Yarmouth was a 64-gun third rate ship of t... 0\n", + "127419 Ryan Taylor (born June 18, 1992) is an America... 0\n", + "44654 The 1962 Bluebonnet Bowl was an American colle... 1\n", + "272756 Dichorisandrinae is a subtribe within the trib... 1\n", + "91933 Phalaenopsis subparishii, also known as 短茎萼脊兰 ... 1\n", + "214281 Thomas Forsyth Torrance (30 August 1913 – 5 O... 1\n", + "51312 Yoann Maestri (; born 14 January 1988) is a Fr... 0\n", + "131201 Lonnie Shields (born April 17, 1956) is an Ame... 1\n", + "61440 John Shelton Fenty (born 10 December 1961) is ... 1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={\n", + " \"Paragraph\": \"text\", \n", + " \"AI_generated\": \"label\"\n", + "}, \n", + " inplace=True\n", + ")\n", + "\n", + "df.sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/pyarrow/pandas_compat.py:373: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if _pandas_api.is_sparse(col):\n" + ] + }, + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['text', 'label'],\n", + " num_rows: 300476\n", + "})" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset_df= Dataset.from_pandas(df)\n", + "dataset_df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatasetDict({\n", + " train: Dataset({\n", + " features: ['text', 'label'],\n", + " num_rows: 240380\n", + " })\n", + " test: Dataset({\n", + " features: ['text', 'label'],\n", + " num_rows: 60096\n", + " })\n", + "})" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_data= dataset_df.train_test_split(test_size=0.2)\n", + "new_data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + ] + } + ], + "source": [ + "from transformers import AutoTokenizer\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(\"models/trained_model_v11\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess_function(examples):\n", + " return tokenizer(examples[\"text\"], truncation=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Map: 100%|██████████| 240380/240380 [01:00<00:00, 3995.57 examples/s]\n", + "Map: 100%|██████████| 60096/60096 [00:15<00:00, 3983.79 examples/s]\n" + ] + } + ], + "source": [ + "tokenized_df = new_data.map(preprocess_function, batched=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-09 08:16:21.518621: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-10-09 08:16:22.221621: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ], + "source": [ + "# from transformers import DataCollatorWithPadding\n", + "\n", + "# data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors=\"tf\")\n", + "\n", + "\n", + "\n", + "\n", + "from transformers import DataCollatorWithPadding\n", + "\n", + "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import evaluate\n", + "\n", + "accuracy = evaluate.load(\"accuracy\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "\n", + "def compute_metrics(eval_pred):\n", + " predictions, labels = eval_pred\n", + " predictions = np.argmax(predictions, axis=1)\n", + " return accuracy.compute(predictions=predictions, references=labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "id2label = {0: \"NEGATIVE\", 1: \"POSITIVE\"}\n", + "label2id = {\"NEGATIVE\": 0, \"POSITIVE\": 1}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n", + "\n", + "model = AutoModelForSequenceClassification.from_pretrained(\n", + " \"models/trained_model_v11\", num_labels=2, id2label=id2label, label2id=label2id\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [24978/30048 1:08:42 < 13:56, 6.06 it/s, Epoch 1.66/2]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation LossAccuracy
10.0061000.0213620.995540

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "training_args = TrainingArguments(\n", + " output_dir=\"gpt3_finetuned_model\",\n", + " learning_rate=2e-5,\n", + " per_device_train_batch_size=16,\n", + " per_device_eval_batch_size=16,\n", + " num_train_epochs=2,\n", + " weight_decay=0.01,\n", + " evaluation_strategy=\"epoch\",\n", + " save_strategy=\"epoch\",\n", + " load_best_model_at_end=True,\n", + " # push_to_hub=True,\n", + ")\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenized_df[\"train\"],\n", + " eval_dataset=tokenized_df[\"test\"],\n", + " tokenizer=tokenizer,\n", + " data_collator=data_collator,\n", + " compute_metrics=compute_metrics,\n", + ")\n", + "\n", + "trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/ubuntu/SentenceStructureComparision'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os; os.chdir('..')\n", + "%pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + ] + } + ], + "source": [ + "from transformers import pipeline\n", + "\n", + "classifier = pipeline(\"text-classification\", model=\"gpt3_finetuned_model/checkpoint-480\", device=\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "AutoTokenizer.__init__() takes 1 positional argument but 2 were given", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/05_data_gpt.ipynb Cell 28\u001b[0m line \u001b[0;36m3\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoModelForSequenceClassification, AutoTokenizer\n\u001b[1;32m 2\u001b[0m model_name\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgpt3_finetuned_model/checkpoint-480\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m AutoTokenizer(model_name)\n", + "\u001b[0;31mTypeError\u001b[0m: AutoTokenizer.__init__() takes 1 positional argument but 2 were given" + ] + } + ], + "source": [ + "# from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", + "# model_name= \"gpt3_finetuned_model/checkpoint-480\"\n", + "# AutoTokenizer.from_pretrained(model_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier('''Water intoxication, also known as water poisoning, hyperhydration, overhydration, or water toxemia, is a potentially fatal disturbance in brain functions that results when the normal balance of electrolytes in the body is pushed outside safe limits by excessive water intake.\n", + "\n", + "Under normal circumstances, accidentally consuming too much water is exceptionally rare. Nearly all deaths related to water intoxication in normal individuals have resulted either from water-drinking contests, in which individuals attempt to consume large amounts of water, or from long bouts of exercise during which excessive amounts of fluid were consumed.[1] In addition, water cure, a method of torture in which the victim is forced to consume excessive amounts of water, can cause water intoxication.[1]\n", + "\n", + "Water, like any other substance, can be considered a poison when over-consumed in a brief period of time. Water intoxication mostly occurs when water is being consumed in a high quantity without adequate electrolyte intake.[2]\n", + "\n", + "Excess of body water may also be a result of a medical condition or improper treatment; see \"hyponatremia\" for some examples. Water is considered one of the least toxic chemical compounds, with an LD50 exceeding 90 ml/kg in rats;[3] drinking six liters in three hours has caused the death of a human.[4]'''\n", + "\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1= '''Basketball is a team sport played by two teams of five players each. The primary objective is to score points by shooting the basketball through the opponent's hoop, which is mounted on a backboard 10 feet (3.048 meters) above the ground. The team with the most points at the end of the game wins. Basketball is played on a rectangular court, typically indoors, with a surface made of wood or synthetic materials. The rules and regulations are governed by various organizations, such as FIBA (International Basketball Federation) and the NBA (National Basketball Association). The following is a general outline of the basic rules of basketball:\n", + "\n", + "1. Game duration: A regulation basketball game is divided into four quarters, each lasting 12 minutes in the NBA and 10 minutes in FIBA play. College basketball in the US has two 20-minute halves. If the game is tied at the end of regulation, overtime periods are played until a winner is determined.\n", + "\n", + "2. Starting play: The game begins with a jump ball at the center of the court, where the referee throws the ball into the air, and one player from each team tries to gain possession by tapping it to a teammate.\n", + "\n", + "3. Scoring: Points are scored by shooting the ball through the hoop. A field goal made from inside the three-point arc is worth two points, while a field goal made from outside the arc is worth three points. Free throws, awarded after a foul, are worth one point each.\n", + "\n", + "4. Possession and dribbling: A player in possession of the ball must either pass it to a teammate or dribble (bounce) the ball while moving.'''\n", + "\n", + "\n", + "classifier(s1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0Syd Rapson (born Sydney Rapson, 28 April 1932 ...1
1Bertram Deacon (8 November 1922 – 3 January 19...0
2Guðlaugur Þór Þórðarson (born 19 December 1967...0
3Achille-Ferdinand Carrier (February 15, 1859 -...0
4Rick Mahler (born Richard Alan Mahler on April...1
\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 Syd Rapson (born Sydney Rapson, 28 April 1932 ... 1\n", + "1 Bertram Deacon (8 November 1922 – 3 January 19... 0\n", + "2 Guðlaugur Þór Þórðarson (born 19 December 1967... 0\n", + "3 Achille-Ferdinand Carrier (February 15, 1859 -... 0\n", + "4 Rick Mahler (born Richard Alan Mahler on April... 1" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "df= pd.read_csv(\"data/AI_checker_gpt3_remade.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([['Syd Rapson (born Sydney Rapson, 28 April 1932 – 28 April 2017) was an English Labour Party politician who served as Member of Parliament (MP) for Portsmouth South from 1997 to 2010.\\n\\nRapson was born in Portsmouth and educated at Portsmouth Grammar School and the University of Southampton. He worked as a teacher and a lecturer before entering politics. He was a councillor on Portsmouth City Council from 1973 to 1997, and was leader of the council from 1983 to 1997.\\n\\nRapson was elected to the House of Commons at the 1997 general election, and held the seat until he stood down at the 2010 general election. He was a member of the Defence Select Committee from 1997 to 2010, and was a member of the Parliamentary Labour Party Defence Committee from 1997 to 2005. He was also a member of the Parliamentary Labour Party Home Affairs Committee from 2005 to 2010.',\n", + " 1],\n", + " [\"Rick Mahler (born Richard Alan Mahler on April 18, 1957 in Atlanta, Georgia) was an American professional baseball pitcher who played for the Atlanta Braves, Cincinnati Reds, Montreal Expos, and Minnesota Twins of Major League Baseball (MLB) from 1978 to 1990. He was a two-time All-Star and won the National League (NL) ERA title in 1985.\\n\\nMahler was drafted by the Braves in the first round of the 1975 amateur draft. He made his major league debut with the Braves in 1978, and was a member of the team's starting rotation for the next five seasons. He was an All-Star in 1983 and 1985, and won the NL ERA title in 1985 with a 2.87 ERA. He was traded to the Reds in 1984, and then to the Expos in 1986. He was traded to the Twins in 1989, and finished his career with them in 1990.\\n\\nMahler was known for his durability, as he pitched over 200 innings in six consecutive seasons from 1983 to 1988. He finished his career with a record of 119–121, a 3.90 ERA, and 1,07\",\n", + " 1],\n", + " [\"Frederick Thomas Brentnall (1862–1937) was an English architect and surveyor. He was born in 1862 in Birmingham, England, and was educated at King Edward's School, Birmingham. He was articled to the Birmingham architect, J.H. Chamberlain, and then worked in the offices of William Bidlake and J.H. Chamberlain. In 1887 he set up his own practice in Birmingham.\\n\\nBrentnall was a prolific architect and surveyor, designing a wide range of buildings including churches, schools, factories, warehouses, offices, shops, and private residences. He was particularly noted for his work in the Arts and Crafts style, and was a member of the Birmingham Artisans' Guild. He was also a member of the Birmingham Architectural Association, and was a Fellow of the Royal Institute of British Architects. He was a keen advocate of the use of reinforced concrete in construction, and was a pioneer in the use of this material in the Midlands. He was also a keen advocate of the use of natural materials such as brick and stone, and was a pioneer in the use of\",\n", + " 1],\n", + " ['Alan Dinehart (April 17, 1888 – October 28, 1944) was an American actor of the silent and early sound film eras. He appeared in more than 200 films between 1915 and 1944, often playing suave, debonair leading men.\\n\\nDinehart was born in New York City and began his career on the stage. He made his film debut in 1915 and quickly became a popular leading man in silent films. He was often cast as a romantic lead opposite stars such as Mary Pickford, Lillian Gish, and Clara Bow. He also appeared in a number of westerns, including The Covered Wagon (1923) and The Iron Horse (1924).\\n\\nIn the early 1930s, Dinehart transitioned to sound films, appearing in a number of popular films such as The Big Broadcast (1932), The Invisible Man (1933), and The Little Minister (1934). He continued to appear in films until his death in 1944.',\n", + " 1],\n", + " [\"Freddie Frith (1917–1994) was an English motorcycle racer who competed in the Isle of Man TT races and other international events. He was born in London and began racing in the 1930s. He won the Isle of Man TT in 1938 and 1939, and was the first rider to win the Senior TT twice in succession. He also won the North West 200 in 1938 and 1939, and the Ulster Grand Prix in 1938 and 1939.\\n\\nFrith was a popular figure in the racing world, and was known for his daring and aggressive riding style. He was also known for his willingness to help other riders, and was often seen helping to push other riders' bikes to the start line. He retired from racing in 1947, but continued to be involved in the sport, working as a mechanic and team manager. He died in 1994, aged 77.\",\n", + " 1]], dtype=object)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.AI_generated==1].values[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier('''Political stability is the ability of a government to maintain order and authority within its borders. It is essential for economic growth, as it provides a foundation for investment and trade.\n", + "There are many factors that contribute to political stability, including:\n", + "A strong rule of law: The rule of law is the principle that everyone is subject to the same laws, regardless of their social status or political affiliation. A strong rule of law helps to prevent corruption and ensures that everyone has equal opportunity to succeed.\n", + "A well-functioning government: A well-functioning government is one that is able to provide essential services, such as security, education, and healthcare. It is also able to manage the economy effectively and to respond to crises.\n", + "A vibrant civil society: A vibrant civil society is one that is made up of active and engaged citizens. Civil society organizations can help to hold the government accountable and to promote democracy and good governance.\n", + "Political stability is not always easy to achieve, but it is essential for economic growth. By investing in political stability, we can create a foundation for long-term prosperity.\n", + "Here are some of the benefits of political stability:''')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# AI generated\n", + "classifier(\"To write a compelling sales copy, focus on understanding your target audience and their pain points. Craft a captivating headline to grab attention, followed by a concise and persuasive introduction. Highlight the benefits and unique selling points of your product or service, using persuasive language and storytelling techniques. Create a sense of urgency and offer a clear call-to-action to drive conversions. Revise and refine your copy for clarity and impact.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Human -> Undetectable AI\n", + "classifier(\n", + " \"Creating a great sales copy starts with identifying your target audience and their pain points. Create headlines that will tell the reader enough to reach a conclusion but make them want to know more. Then write a brief and effective introduction paragraph. Let the readers know how your products/services can help them using persuasive language and storytelling techniques. Make your reader feel a bit urgent when reading your copy and add a call-to-action to encourage them to take action. Revise and refine your copy for clarity and impact.\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# AI\n", + "\n", + "classifier(\"Writing a compelling product description involves understanding your audience, highlighting key features, and emphasizing benefits. Use clear, concise language and incorporate persuasive techniques. Provide accurate information, address potential concerns, and use SEO-friendly keywords. Remember, your goal is to convince customers that your product will improve their lives.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 0.9999998807907104}]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Human -> Undetectable AI\n", + "\n", + "classifier(\n", + " \"When crafting a product description that captivates your audience, it's important to understand them, showcase what your product is capable of, and emphasize how it is helpful. Utilize clear and concise language while incorporating persuasion techniques. Ensure accuracy in providing information and address any potential concerns they may have Additionally, incorporating SEO-friendly keywords will help maximize visibility. Always keep in mind that the ultimate goal is to convince customers that your product will enhance their lives.\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# AI written\n", + "\n", + "classifier(\"A business strategy is an outline of the actions and decisions a company plans to take to reach its goals and objectives. It defines what the company needs to do to reach its goals, which can help guide the decision-making process for hiring as well as resource allocation. A good business strategy is essential for any company that wants to be successful in the long run.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Human -> Undetectable AI\n", + "\n", + "classifier(\"A business strategy outlines the actions and decisions a company will take to achieve its goals and objectives. It provides direction for hiring and resource allocation, helping guide the decision-making process. A strong business strategy is crucial for long-term success.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# AI\n", + "classifier(\n", + " \"Autumn, the season of transformation, sweeps in with a burst of vibrant colors, transforming the world around us. Leaves turn from their ordinary green into fiery shades of red, orange, and yellow. Nature becomes an artist, creating landscapes that seem straight out of a dream. The cool air, crisp and invigorating, carries with it the scent of fallen leaves, promising cozy evenings by the fireplace. It's a time for apple picking, pumpkin carving, and leisurely walks through rustling foliage. The magic of autumn lies in its ability to remind us that change can be breathtakingly beautiful, even as we bid farewell to the warmth of summer. So, embrace this season of transition and find solace in the symphony of colors and the gentle rustle of fallen leaves underfoot.\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# human\n", + "classifier(\n", + " '''Autumn is the season for all senses. The feel of cooler temperatures after a long summer. A warm, visually pleasing palate of reds, oranges and browns. The taste of pumpkin spice in everything. The sound of crunching leaves underfoot. The smell of woodsmoke.\n", + "\n", + "The many elements of autumn either intrinsically deliver happiness or trigger memories of past joy from which we can keep taking bites, as from a freshly baked apple pie.'''\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# blog\n", + "classifier(\n", + " '''Like spring, fall is a season of transition, a reminder of the value of change, in this case from bright, buzzing, verdant summer toward the dark, quiet calm of winter. It’s a journey inward; first experiential, then intellectual and finally into the collective unconscious.'''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# wikipedia \n", + "classifier(\n", + " '''Autumn, also known as fall in North American English,[1] is one of the four temperate seasons on Earth. Outside the tropics, autumn marks the transition from summer to winter, in September (Northern Hemisphere) or March (Southern Hemisphere). Autumn is the season when the duration of daylight becomes noticeably shorter and the temperature cools considerably. Day length decreases and night length increases as the season progresses until the Winter Solstice in December (Northern Hemisphere) and June (Southern Hemisphere). One of its main features in temperate climates is the striking change in colour for the leaves of deciduous trees as they prepare to shed.'''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[\"Bertram Deacon (8 November 1922 – 3 January 1974) was an Australian rules footballer who played for Carlton in the Victorian Football League (VFL). He is remembered for being Carlton's first ever Brownlow Medal winner. A centre half back, Deacon began his league career in 1942. He was a premiership player for Carlton in 1945 (after acting as captain-coach of an army stores team that won all ten games in a services competition in Darwin) and 1947, the latter in a year which he won both a Brownlow Medal and shared Carlton's Best and Fairest Medal with his captain, Ern Henfry. The previous season Deacon had finished fourth in the Brownlow Medal count but this time around finished on top, thus becoming Carlton's inaugural winner of the award. Deacon left Carlton after the 1951 season and became captain-coach for Preston in the Victorian Football Association (VFA), the club where he had played for prior to being recruited by Carlton. He later served as Carlton vice-president and club secretary. He is the centre half back in Carlton's official 'Team of the Century'. Off the field, Deacon worked for many years for long-serving VFL and Carlton president Sir Kenneth Luke. Bertram Deacon died of a heart attack on 3 January 1974 while on holiday at Balnarring. Deacon was just 51 years of age and died despite the desperate efforts of his Preston team-mate, Pat Foley, to revive him.\",\n", + " 0],\n", + " [\"Guðlaugur Þór Þórðarson (born 19 December 1967) is an Icelandic politician. He has been a member of the Althing (Iceland's parliament) for the Independence Party since 2003 and served as the Minister of Health from 24 May 2007 to 1 February 2009. He is the first MP of the Reykjavík North constituency, after beating the Minister of Justice and Ecclesiastical Affairs, Björn Bjarnason, in the primary election. Guðlaugur graduated with a BA degree in political sciences from the University of Iceland in 1996. He is married to Ágústa Johnson, and is the father of two children. Guðlaugur started his involvement in the Independence Party in 1987 when he was elected to the board of the National Youth Organisation of the Independence Party, in which he served until 1997; of which he was Vice Chairman from 1989 to 1993 and Chairman from 1993 to 1997. He was then elected to the Reykjavík City Council in 1998 and sat two four-year terms there. He sat on the Parliamentary Committee on Welfare Issues from 2003 to 2006, the Committee on Fisheries from 2003 to 2007, and the Committee on the Environment from 2003 to 2007 (Chairman from 2004 to 2007).\",\n", + " 0],\n", + " ['Achille-Ferdinand Carrier (February 15, 1859 - March 21, 1930) was a lawyer, judge, and political figure in Quebec. He represented Gaspé in the Legislative Assembly of Quebec from 1890 to 1892 as a Liberal. He was born in Saint-Roch, Canada East, the son of Ferdinand Carrier and Mary Ann Donahue, and was educated at the Séminaire de Québec and the Université Laval. Carrier was called to the Quebec bar in 1882. He was also a member of the Minnesota bar and practised in Minneapolis in 1885 and 1886. Carrier was the editor of the \"L\\'Écho de l\\'Ouest\", a French-Canadian newspaper in Minneapolis. He ran unsuccessfully for a seat in the Canadian House of Commons in 1887. Carrier was defeated when he ran for reelection in 1892; he also ran unsuccessfully for the Terrebonne seat in 1897. He served as judge in the Magistrate\\'s Court for Terrebonne, Joliette and Ottawa districts from 1898 to 1924. Carrier died in Quebec City at the age of 71. He was the uncle of Oscar Lefebvre Boulanger.',\n", + " 0],\n", + " ['John Howe (born August 21, 1957) is a Canadian book illustrator, living in Neuchâtel, Switzerland. One year after graduating from high school, he studied at a college in Strasbourg, France, then at the École des Arts Décoratifs. He is best known for his work based on J.R.R. Tolkien\\'s worlds. Howe and noted Tolkien artist Alan Lee served as chief conceptual designers for Peter Jackson\\'s \"The Lord of the Rings\" movie trilogy, and Howe also did the illustration for the \"Lord of the Rings\" board game created by Reiner Knizia. Howe also re-illustrated the maps of \"The Lord of the Rings\", \"The Hobbit\", and \"The Silmarillion\" from 1996 to 2003. His work is not limited to this, and includes images of myths such as the Anglo Saxon legend of Beowulf (he also illustrated Knizia\\'s board game). Howe has illustrated many other books, many of which belong to the fantasy genre (such as Robin Hobb\\'s books). He also contributed to the film adaptation of C.S. Lewis\\'s \"The Lion, the Witch, and the Wardrobe\". In 2005, a limited edition of George R.R. Martin\\'s novel \"A Clash of Kings\" was released by Meisha Merlin, complete with numerous illustrations by Howe. Howe has also illustrated cards for the \"Magic: The Gathering\" collectible card game. For the \"The Hobbit\" films, former director Guillermo del Toro and current director Peter Jackson have been in consultation with Howe and fellow conceptual artist Alan Lee to ensure continuity of design. Howe is a member of the living history group The Company of Saynt George.',\n", + " 0],\n", + " ['Lionel Aldridge (February 14, 1941 - February 12, 1998) was an American professional football player. Aldridge was an All-Skyline tackle and co-captain of the Utah State Aggies. He was drafted in 1963 after a standout college career at Utah State. One of the few rookies to start for coach Vince Lombardi, Aldridge enjoyed an eleven-year NFL career. As a Packer, he played a role in three straight NFL championships (1965-66-67) and in Packer victories in Super Bowls I and II. Traded to the San Diego Chargers, Aldridge played two seasons in San Diego before retiring from professional football in 1973. After retiring, Aldridge worked as a sports analyst in Milwaukee and for Packers radio and NBC until manifesting paranoid schizophrenia in the late 1970s. Homeless for a time in part due to misdiagnosis, he eventually reached a form of equilibrium. He became an advocate for the homeless and the mentally ill until his death in 1998. His advocacy work included serving as a board member for the Mental Health Association of Milwaukee and working as a speaker for the National Alliance on Mental Illness.',\n", + " 0]], dtype=object)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.AI_generated==0].values[:5]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "s= \"\"\"John Howe (born August 21, 1957) is a Canadian book illustrator, living in Neuchâtel, Switzerland. One year after graduating from high school, he studied at a college in Strasbourg, France, then at the École des Arts Décoratifs. He is best known for his work based on J.R.R. Tolkien\\'s worlds. Howe and noted Tolkien artist Alan Lee served as chief conceptual designers for Peter Jackson\\'s \"The Lord of the Rings\" movie trilogy, and Howe also did the illustration for the \"Lord of the Rings\" board game created by Reiner Knizia. Howe also re-illustrated the maps of \"The Lord of the Rings\", \"The Hobbit\", and \"The Silmarillion\" from 1996 to 2003. His work is not limited to this, and includes images of myths such as the Anglo Saxon legend of Beowulf (he also illustrated Knizia\\'s board game). Howe has illustrated many other books, many of which belong to the fantasy genre (such as Robin Hobb\\'s books). He also contributed to the film adaptation of C.S. Lewis\\'s \"The Lion, the Witch, and the Wardrobe\". In 2005, a limited edition of George R.R. Martin\\'s novel \"A Clash of Kings\" was released by Meisha Merlin, complete with numerous illustrations by Howe. Howe has also illustrated cards for the \"Magic: The Gathering\" collectible card game. For the \"The Hobbit\" films, former director Guillermo del Toro and current director Peter Jackson have been in consultation with Howe and fellow conceptual artist Alan Lee to ensure continuity of design. Howe is a member of the living history group The Company of Saynt George.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0Syd Rapson (born Sydney Rapson, 28 April 1932 ...1
1Bertram Deacon (8 November 1922 – 3 January 19...0
2Guðlaugur Þór Þórðarson (born 19 December 1967...0
3Achille-Ferdinand Carrier (February 15, 1859 -...0
4Rick Mahler (born Richard Alan Mahler on April...1
\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 Syd Rapson (born Sydney Rapson, 28 April 1932 ... 1\n", + "1 Bertram Deacon (8 November 1922 – 3 January 19... 0\n", + "2 Guðlaugur Þór Þórðarson (born 19 December 1967... 0\n", + "3 Achille-Ferdinand Carrier (February 15, 1859 -... 0\n", + "4 Rick Mahler (born Richard Alan Mahler on April... 1" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# classifier(\"My name is Deepankar Sharma.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\n", + " '''Some cultures regard the autumnal equinox as \"mid-autumn\", while others with a longer temperature lag treat the equinox as the start of autumn.[2] In the English-speaking world of high latitude countries, autumn traditionally began with Lammas Day and ended around Hallowe'en, the approximate mid-points between midsummer, the autumnal equinox, and midwinter. Meteorologists (and Australia[3][4] and most of the temperate countries in the southern hemisphere)[5][6] use a definition based on Gregorian calendar months, with autumn being September, October, and November in the northern hemisphere,[7] and March, April, and May in the southern hemisphere.\n", + "\n", + "In the higher latitude countries in the Northern Hemisphere, autumn traditionally starts with the September equinox (21 to 24 September)[8] and ends with the winter solstice (21 or 22 December).[9] Popular culture in the United States associates Labor Day, the first Monday in September, as the end of summer and the start of autumn; certain summer traditions, such as wearing white, are discouraged after that date.[10] As daytime and nighttime temperatures decrease, trees change colour and then shed their leaves.[11] Persians celebrate the beginning of the autumn on Mehregan.\n", + "\n", + "Under the traditional East Asian solar term system, autumn starts on or around 8 August and ends on or about 7 November. In Ireland, the autumn months according to the national meteorological service, Met Éireann, are September, October, and November.[12] However, according to the Irish Calendar, which is based on ancient Gaelic traditions, autumn lasts throughout the months of August, September, and October, or possibly a few days later, depending on tradition. In the Irish language, September is known as Meán Fómhair (\"middle of autumn\") and October as Deireadh Fómhair (\"end of autumn\").[13][14] Late Roman Republic scholar Marcus Terentius Varro defined autumn as lasting from the third day before the Ides of Sextilis (August 11) to the fifth day before the Ides of November (November 9).[15]'''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier('''The word autumn (/ˈɔːtəm/) is derived from Latin autumnus, archaic auctumnus, possibly from the ancient Etruscan root autu- and has within it connotations of the passing of the year.[16] Alternative etymologies include Proto-Indo-European *h₃ewǵ- (\"cold\") or *h₂sows- (\"dry\").[17]\n", + "\n", + "After the Greek era, the word continued to be used as the Old French word autompne (automne in modern French) or autumpne in Middle English,[18] and was later normalised to the original Latin. In the Medieval period, there are rare examples of its use as early as the 12th century, but by the 16th century, it was in common use.\n", + "\n", + "Before the 16th century, harvest was the term usually used to refer to the season, as it is common in other West Germanic languages to this day (cf. Dutch herfst, German Herbst, and Scots hairst). However, as more people gradually moved from working the land to living in towns, the word harvest lost its reference to the time of year and came to refer only to the actual activity of reaping, and autumn, as well as fall, began to replace it as a reference to the season.[19][20][better source needed]\n", + "\n", + "The alternative word fall for the season traces its origins to old Germanic languages. The exact derivation is unclear, with the Old English fiæll or feallan and the Old Norse fall all being possible candidates. However, these words all have the meaning \"to fall from a height\" and are clearly derived either from a common root or from each other. The term came to denote the season in 16th-century England, a contraction of Middle English expressions like \"fall of the leaf\" and \"fall of the year\". Compare the origin of spring from \"spring of the leaf\" and \"spring of the year\".[21]\n", + "\n", + "During the 17th century, Englishmen began emigrating to the new North American colonies, and the settlers took the English language with them. While the term fall gradually became nearly obsolete in Britain, it became the more common term in North America.[22]\n", + "\n", + "The name backend, a once common name for the season in Northern England, has today been largely replaced by the name autumn.[23]''')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\n", + " '''Indian summer, period of dry, unseasonably warm weather in late October or November in the central and eastern United States. The term originated in New England and probably arose from the Indians’ practice of gathering winter stores at this time. This autumn warm period also occurs in Europe, where in Britain it is called All-hallown summer or Old Wives’ summer. Indian summer may occur several times in some years and not at all in others; it often persists for a week or longer. The nights are cool and may bring frost, and the days have hazy skies and light winds. The lack of clouds causes the daytime hours to be quite pleasant as the air usually has a low relative humidity and the trees have their autumn foliage.\n", + "\n", + "In the United States, an Indian summer period occurs when a cool, shallow polar air mass stagnates and becomes a deep, warm high-pressure centre. This centre is characterized by a strong low-level temperature inversion that produces a stable air stratification. As a result, vertical air motions are inhibited, and smoke and dust are concentrated near the ground, which accounts for the haziness.'''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\"\"\"Autumn, with its crisp air and the earthy scent of fallen leaves, is a season of unparalleled beauty and transformation. As the days grow shorter and the sun's angle lowers in the sky, the once-green leaves of trees burst into a magnificent display of warm hues – fiery reds, vibrant oranges, and rich golds. It's a time when nature becomes an artist, painting landscapes with its vibrant palette. The gentle rustle of leaves underfoot and the taste of freshly picked apples evoke a sense of nostalgia and warmth. Autumn invites us to cozy up with a hot drink, watch the world change, and embrace the idea that even in the midst of change, there is a unique and serene beauty that can be found in letting go, just as the trees release their leaves to prepare for the coming winter.\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 0.9999998807907104}]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\"Autumn, that glorious season of transition, heralds its arrival with a symphony of sensory delights. The air turns crisper, carrying with it the earthy fragrance of fallen leaves, while the sun, with its lowered angle, casts a soft, golden glow upon the world. It's the time when nature dons its artist's robe, meticulously crafting landscapes adorned in a dazzling array of colors - from the fiery reds and burnt oranges to the deep, lustrous golds. As the leaves gently cascade to the ground, they form a vibrant carpet that beckons us to take leisurely walks and relish the satisfying crunch underfoot. There's a certain nostalgia that permeates the season, evoking memories of warm apple pies, pumpkin patches, and cozy fireside gatherings. Autumn serves as a reminder that change is inevitable, but it carries with it a unique beauty and a promise of renewal, just as the trees shed their leaves to prepare for the serene embrace of winter.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier('''Autumn, that magnificent season of transformation, unfurls its tapestry of wonder with a graceful and undeniable presence. As the calendar turns, the very atmosphere seems to shift, bearing the crisp, clean scent of fallen leaves and the promise of cool, invigorating days. The sun, now casting its warm embrace at a lower angle, bathes the world in a soft, golden light that dances upon the landscape. Nature, in all its boundless artistry, takes center stage, meticulously painting the trees in a breathtaking spectrum of colors that defy mere description – from the fiery reds that blaze like an artist's passion, to the vibrant oranges that resonate with life, and the rich, gilded golds that evoke feelings of timeless elegance.\\n\\nThis is the season when the trees, as if in a grand act of surrender, let go of their leaves, allowing them to flutter gracefully to the ground. As they blanket the earth, they form a carpet of memories that beckon us to take unhurried strolls, delighting in the satisfying rustle and gentle crunch beneath our footsteps. Autumn carries with it a peculiar sense of nostalgia, invoking memories of orchard gatherings for apple-picking, the artistry of intricately carved pumpkins, and the cherished warmth of fireside gatherings. It serves as a poignant reminder that change, inevitable and relentless, possesses its own unique beauty – a beauty that lies in the impermanence of things, in the promise of renewal, just as the trees shed their leaves to prepare for the serene hush of winter's embrace.''')" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\n", + " '''Rick Mahler (born Richard Alan Mahler on April 18, 1957 in Atlanta, Georgia) was an American professional baseball pitcher who played for the Atlanta Braves, Cincinnati Reds, Montreal Expos, and Minnesota Twins of Major League Baseball (MLB) from 1978 to 1990. He was a two-time All-Star and won the National League (NL) ERA title in 1985.\\n\\nMahler was drafted by the Braves in the first round of the 1975 amateur draft. He made his major league debut with the Braves in 1978, and was a member of the team's starting rotation for the next five seasons. He was an All-Star in 1983 and 1985, and won the NL ERA title in 1985 with a 2.87 ERA. He was traded to the Reds in 1984, and then to the Expos in 1986. He was traded to the Twins in 1989, and finished his career with them in 1990.\\n\\nMahler was known for his durability, as he pitched over 200 innings in six consecutive seasons from 1983 to 1988. He finished his career with a record of 119–121, a 3.90 ERA, and 1,07\",\n", + " '''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 1.0}]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\n", + " '''Rick Mahler was a Major League Baseball (MLB) pitcher who played in the league for 11 seasons. He was born on August 5, 1953, in Austin, Texas, and he passed away on December 9, 2005. Mahler's MLB career began in 1979 when he made his debut with the Atlanta Braves. He was known for his time as a starting pitcher with the Braves, Cincinnati Reds, Montreal Expos, and Florida Marlins.\n", + "\n", + "During his career, Rick Mahler was known for his consistency and durability as a pitcher. He was not an overpowering pitcher but relied on control and finesse. His best season came in 1985 when he had a 17-15 record with the Atlanta Braves and was named an All-Star. He was a reliable innings-eater for several teams throughout his career.\n", + "\n", + "Rick Mahler retired from professional baseball in 1991. After his playing career, he continued to be involved in the game as a coach and instructor. He passed away in 2005 at the age of 52. While not one of the most widely recognized names in baseball, Mahler had a respectable career and made his mark on the sport during his time as a pitcher.'''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'NEGATIVE', 'score': 1.0}]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier(\"\"\"Autumn, often referred to as fall, is one of the four temperate seasons that follows summer and precedes winter. It typically begins in September (Northern Hemisphere) or March (Southern Hemisphere) and lasts until December (Northern Hemisphere) or June (Southern Hemisphere). Autumn is characterized by a gradual change in weather and the environment.\n", + "\n", + "One of the most striking features of autumn is the transformation of foliage. Deciduous trees shed their leaves, which transition from their usual green to a breathtaking array of colors, including reds, oranges, yellows, and browns. This phenomenon is often called \"fall foliage\" and attracts tourists and nature enthusiasts to witness the vibrant displays in various regions around the world.\n", + "\n", + "The season also brings cooler temperatures and shorter daylight hours as the Earth tilts away from the sun. This shift in sunlight triggers a cascade of natural events, including the maturation of many fruits and nuts, making it a fruitful time for harvesting. Apple picking, pumpkin carving, and the preparation of hearty, warming meals are cherished autumn traditions in many cultures.\n", + "\n", + "Autumn is often associated with a sense of change and introspection. The crisp air and the sound of leaves crunching underfoot create a unique sensory experience. It's a season that invites people to reflect on the passing of time and the beauty of impermanence.\n", + "\n", + "In many places, autumn festivals and holidays like Halloween and Thanksgiving are celebrated, bringing communities together for various activities and traditions. Overall, autumn is a season of both visual and sensory splendor, where the world transforms, and nature puts on a dazzling show before the arrival of winter's chill.\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ParagraphAI_generated
0Syd Rapson (born Sydney Rapson, 28 April 1932 ...1
1Bertram Deacon (8 November 1922 – 3 January 19...0
2Guðlaugur Þór Þórðarson (born 19 December 1967...0
3Achille-Ferdinand Carrier (February 15, 1859 -...0
4Rick Mahler (born Richard Alan Mahler on April...1
.........
471John Loder (1904–1972) was a British sound eng...1
472Paul Caddis (born 28 October 1988) is a Scotti...1
473Richard Michael \"Kip\" Carpenter (14 August 192...0
474Tadeusz Szeligowski (1896–1963) was a Polish c...1
475Iwakuma made his professional debut with the B...0
\n", + "

476 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Paragraph AI_generated\n", + "0 Syd Rapson (born Sydney Rapson, 28 April 1932 ... 1\n", + "1 Bertram Deacon (8 November 1922 – 3 January 19... 0\n", + "2 Guðlaugur Þór Þórðarson (born 19 December 1967... 0\n", + "3 Achille-Ferdinand Carrier (February 15, 1859 -... 0\n", + "4 Rick Mahler (born Richard Alan Mahler on April... 1\n", + ".. ... ...\n", + "471 John Loder (1904–1972) was a British sound eng... 1\n", + "472 Paul Caddis (born 28 October 1988) is a Scotti... 1\n", + "473 Richard Michael \"Kip\" Carpenter (14 August 192... 0\n", + "474 Tadeusz Szeligowski (1896–1963) was a Polish c... 1\n", + "475 Iwakuma made his professional debut with the B... 0\n", + "\n", + "[476 rows x 2 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'label': 'POSITIVE', 'score': 0.9975578784942627}]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classifier('''Paul Caddis is a Scottish professional footballer who primarily plays as a defender, specifically as a right-back or right-wing-back. He was born on April 19, 1988, in Irvine, Scotland. Caddis has had a career spanning multiple clubs in English and Scottish football.\n", + "\n", + "Caddis began his professional career with Scottish club Celtic in 2006, but he spent much of his early career on loan spells at other clubs to gain experience. In 2010, he joined Swindon Town on loan and played a crucial role in helping them achieve promotion from League Two to League One. His performances earned him a permanent move to Swindon Town in 2010.\n", + "\n", + "During his time at Swindon Town, Paul Caddis established himself as a dependable right-back and captain of the team. He also had a brief loan spell at Birmingham City in 2013, which eventually led to a permanent transfer in the same year. He became a regular starter for Birmingham City and continued to be a valuable member of the squad during his time there.\n", + "\n", + "Caddis later moved to Blackburn Rovers in 2016, where he spent a couple of seasons before joining Bradford City in 2018. He continued to ply his trade in the lower divisions of English football.\n", + "\n", + "Please note that player careers in professional sports can change over time, and my knowledge is based on information available up to September 2021. For the most up-to-date information on Paul Caddis's career, you may want to refer to more recent sources or the official website of his current club.''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}