{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'bs4'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[2], line 6\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpymongo\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MongoClient\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrequests\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbs4\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BeautifulSoup\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'bs4'" ] } ], "source": [ "import csv\n", "import pandas as pd \n", "from pymongo import MongoClient\n", "\n", "import requests\n", "from bs4 import BeautifulSoup\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Connect to MongoDB\n", "client = MongoClient(\"mongodb://localhost:27017/\")\n", "db = client[\"myDatabase\"]\n", "source_collection = db[\"data\"]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Export translated data to a CSV file #bu dosyayı json olarak indirdim\n", "\"\"\"yeni_data = list(source_collection.find())\n", "print(yeni_data)\"\"\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _id title \\\n", "0 {'$oid': '66a1020f29abc84d21689044'} Mental Note Vol. 24 \n", "1 {'$oid': '66a1020f29abc84d21689045'} Your Brain On Coronavirus \n", "2 {'$oid': '66a1020f29abc84d21689046'} Mind Your Nose \n", "3 {'$oid': '66a1020f29abc84d21689047'} The 4 Purposes of Dreams \n", "4 {'$oid': '66a1020f29abc84d21689048'} Surviving a Rod Through the Head \n", "\n", " url authors \\\n", "0 https://medium.com/invisible-illness/mental-no... ['Ryan Fan'] \n", "1 https://medium.com/age-of-awareness/how-the-pa... ['Simon Spichak'] \n", "2 https://medium.com/neodotlife/mind-your-nose-f... [] \n", "3 https://medium.com/science-for-real/the-4-purp... ['Eshan Samaranayake'] \n", "4 https://medium.com/live-your-life-on-purpose/s... ['Rishav Sinha'] \n", "\n", " timestamp \\\n", "0 2020-12-26 03:38:10.479000+00:00 \n", "1 2020-09-23 22:10:17.126000+00:00 \n", "2 2020-10-10 20:17:37.132000+00:00 \n", "3 2020-12-21 16:05:19.524000+00:00 \n", "4 2020-02-26 00:01:01.576000+00:00 \n", "\n", " tags \n", "0 ['Mental Health', 'Health', 'Psychology', 'Sci... \n", "1 ['Mental Health', 'Coronavirus', 'Science', 'P... \n", "2 ['Biotechnology', 'Neuroscience', 'Brain', 'We... \n", "3 ['Health', 'Neuroscience', 'Mental Health', 'P... \n", "4 ['Brain', 'Health', 'Development', 'Psychology... \n" ] } ], "source": [ "#csv dosyası olarak yüklenmesi\n", "df=pd.read_json('myDatabase.data.json')\n", "print(df.head())" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | _id | \n", "title | \n", "url | \n", "authors | \n", "timestamp | \n", "tags | \n", "
---|---|---|---|---|---|---|
0 | \n", "{'$oid': '66a1020f29abc84d21689044'} | \n", "Mental Note Vol. 24 | \n", "https://medium.com/invisible-illness/mental-no... | \n", "['Ryan Fan'] | \n", "2020-12-26 03:38:10.479000+00:00 | \n", "['Mental Health', 'Health', 'Psychology', 'Sci... | \n", "
1 | \n", "{'$oid': '66a1020f29abc84d21689045'} | \n", "Your Brain On Coronavirus | \n", "https://medium.com/age-of-awareness/how-the-pa... | \n", "['Simon Spichak'] | \n", "2020-09-23 22:10:17.126000+00:00 | \n", "['Mental Health', 'Coronavirus', 'Science', 'P... | \n", "
2 | \n", "{'$oid': '66a1020f29abc84d21689046'} | \n", "Mind Your Nose | \n", "https://medium.com/neodotlife/mind-your-nose-f... | \n", "[] | \n", "2020-10-10 20:17:37.132000+00:00 | \n", "['Biotechnology', 'Neuroscience', 'Brain', 'We... | \n", "
3 | \n", "{'$oid': '66a1020f29abc84d21689047'} | \n", "The 4 Purposes of Dreams | \n", "https://medium.com/science-for-real/the-4-purp... | \n", "['Eshan Samaranayake'] | \n", "2020-12-21 16:05:19.524000+00:00 | \n", "['Health', 'Neuroscience', 'Mental Health', 'P... | \n", "
4 | \n", "{'$oid': '66a1020f29abc84d21689048'} | \n", "Surviving a Rod Through the Head | \n", "https://medium.com/live-your-life-on-purpose/s... | \n", "['Rishav Sinha'] | \n", "2020-02-26 00:01:01.576000+00:00 | \n", "['Brain', 'Health', 'Development', 'Psychology... | \n", "