GPT-Researcher / outputs /task_1738166363_Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry.json
Shreyas094's picture
Upload 528 files
372531f verified
raw
history blame
159 kB
{
"timestamp": "2025-01-29T21:29:23.249087",
"events": [
{
"timestamp": "2025-01-29T21:29:26.624644",
"type": "event",
"data": {
"type": "logs",
"content": "starting_research",
"output": "\ud83d\udd0d Starting the research task for 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. '...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:26.628740",
"type": "event",
"data": {
"type": "logs",
"content": "agent_generated",
"output": "\ud83e\udd16 AI Research Agent",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:26.645849",
"type": "event",
"data": {
"type": "logs",
"content": "planning_research",
"output": "\ud83c\udf10 Browsing the web to learn more about the task: Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. ...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:31.653913",
"type": "event",
"data": {
"type": "logs",
"content": "planning_research",
"output": "\ud83e\udd14 Planning the research strategy and subtasks...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:35.511464",
"type": "event",
"data": {
"type": "logs",
"content": "subqueries",
"output": "\ud83d\uddc2\ufe0f I will conduct my research based on the following queries: ['\"Deepseek v3 R1\" model architecture and performance benchmarks', 'Impact of \"Deepseek v3 R1\" on AI industry competition and market share', '\"Deepseek v3 R1\" open-source code and community contributions analysis', '\"Deepseek v3 R1\" training costs and resource efficiency compared to competitors', 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. ']...",
"metadata": [
"\"Deepseek v3 R1\" model architecture and performance benchmarks",
"Impact of \"Deepseek v3 R1\" on AI industry competition and market share",
"\"Deepseek v3 R1\" open-source code and community contributions analysis",
"\"Deepseek v3 R1\" training costs and resource efficiency compared to competitors",
"Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. "
]
}
},
{
"timestamp": "2025-01-29T21:29:35.557976",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" model architecture and performance benchmarks'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:35.600410",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for 'Impact of \"Deepseek v3 R1\" on AI industry competition and market share'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:35.617500",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" open-source code and community contributions analysis'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:35.625880",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" training costs and resource efficiency compared to competitors'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:35.656574",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. '...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:39.196567",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://pub.towardsai.net/the-deepseek-revolution-why-this-ai-model-is-outperforming-tech-giants-in-85-of-enterprise-tasks-8fa3fd1284a2\n",
"metadata": "https://pub.towardsai.net/the-deepseek-revolution-why-this-ai-model-is-outperforming-tech-giants-in-85-of-enterprise-tasks-8fa3fd1284a2"
}
},
{
"timestamp": "2025-01-29T21:29:39.210874",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\n",
"metadata": "https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/"
}
},
{
"timestamp": "2025-01-29T21:29:39.223058",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://bdtechtalks.com/2025/01/29/deepseek-r1-winners-losers/\n",
"metadata": "https://bdtechtalks.com/2025/01/29/deepseek-r1-winners-losers/"
}
},
{
"timestamp": "2025-01-29T21:29:39.234880",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.nytimes.com/2025/01/28/technology/china-deepseek-ai-silicon-valley.html\n",
"metadata": "https://www.nytimes.com/2025/01/28/technology/china-deepseek-ai-silicon-valley.html"
}
},
{
"timestamp": "2025-01-29T21:29:39.250086",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.reuters.com/technology/artificial-intelligence/what-is-deepseek-why-is-it-disrupting-ai-sector-2025-01-27/\n",
"metadata": "https://www.reuters.com/technology/artificial-intelligence/what-is-deepseek-why-is-it-disrupting-ai-sector-2025-01-27/"
}
},
{
"timestamp": "2025-01-29T21:29:39.265417",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:39.281159",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 5 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:40.533380",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 3 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:40.547726",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 3 new images from 3 total images",
"metadata": [
"https://felloai.com/wp-content/uploads/2025/01/All-About-DeepSeek-Company-and-their-revolutionary-R1-and-V3-models-that-are-disruption-AI-Industry.jpg",
"https://felloai.com/wp-content/uploads/2025/01/deepseek-officially-tops-the-appstore-v0-eb8nxvvptdfe1.jpeg-831x1024.webp",
"https://felloai.com/wp-content/uploads/2025/01/Screenshot-2025-01-27-at-11.28.00-1-1024x387.png"
]
}
},
{
"timestamp": "2025-01-29T21:29:40.569126",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:40.586432",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. ...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:40.892145",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\n",
"metadata": "https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/"
}
},
{
"timestamp": "2025-01-29T21:29:40.909097",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.linkedin.com/pulse/deepseek-r1-next-leap-ai-reasoning-logical-inference-pandiya-fwlqe\n",
"metadata": "https://www.linkedin.com/pulse/deepseek-r1-next-leap-ai-reasoning-logical-inference-pandiya-fwlqe"
}
},
{
"timestamp": "2025-01-29T21:29:40.919917",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.youtube.com/watch?v=sRxQBmHNbnU\n",
"metadata": "https://www.youtube.com/watch?v=sRxQBmHNbnU"
}
},
{
"timestamp": "2025-01-29T21:29:40.927091",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\n",
"metadata": "https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/"
}
},
{
"timestamp": "2025-01-29T21:29:40.946773",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://play.ht/blog/deepseek-vs-claude-vs-llama-vs-chatgpt/\n",
"metadata": "https://play.ht/blog/deepseek-vs-claude-vs-llama-vs-chatgpt/"
}
},
{
"timestamp": "2025-01-29T21:29:40.957398",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:40.966575",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 5 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:45.925916",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 3 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:45.939560",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 1 new images from 1 total images",
"metadata": [
"https://www.innobu.com/wp-content/uploads/2025/01/n13ls_Open-Source_AI_-ar_169_-profile_i18tzau_-stylize_0_-_e68ad48f-3546-4573-9128-5d5367342248_0.jpg"
]
}
},
{
"timestamp": "2025-01-29T21:29:45.949045",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:45.963743",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" model architecture and performance benchmarks...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:46.132198",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.techmeme.com/250127/p29\n",
"metadata": "https://www.techmeme.com/250127/p29"
}
},
{
"timestamp": "2025-01-29T21:29:46.146490",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://x.com/patrickmoorhead?lang=en\n",
"metadata": "https://x.com/patrickmoorhead?lang=en"
}
},
{
"timestamp": "2025-01-29T21:29:46.161974",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.reddit.com/r/LLMDevs/top/\n",
"metadata": "https://www.reddit.com/r/LLMDevs/top/"
}
},
{
"timestamp": "2025-01-29T21:29:46.183865",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\n",
"metadata": "https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost"
}
},
{
"timestamp": "2025-01-29T21:29:46.202194",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.techmeme.com/250127/p32\n",
"metadata": "https://www.techmeme.com/250127/p32"
}
},
{
"timestamp": "2025-01-29T21:29:46.225817",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:46.240934",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 5 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:52.998526",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 4 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:53.024047",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 2 new images from 2 total images",
"metadata": [
"https://arbisoft.com/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fcontact.c5602fd6.png&w=1440&q=75",
"https://arbisoft.com/_next/image?url=https%3A%2F%2Fd1foa0aaimjyw4.cloudfront.net%2FBlog_Image_1_b56afb0c54.png&w=1920&q=75"
]
}
},
{
"timestamp": "2025-01-29T21:29:53.047557",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:53.060080",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" open-source code and community contributions analysis...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:53.854500",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\n",
"metadata": "https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7"
}
},
{
"timestamp": "2025-01-29T21:29:54.156370",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.reddit.com/r/swingtrading/comments/1ib4deg/let_me_cut_through_the_deepseek_fud_and_give_you/\n",
"metadata": "https://www.reddit.com/r/swingtrading/comments/1ib4deg/let_me_cut_through_the_deepseek_fud_and_give_you/"
}
},
{
"timestamp": "2025-01-29T21:29:54.391109",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:54.412873",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 2 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:55.991379",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 2 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:56.009580",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 0 new images from 0 total images",
"metadata": []
}
},
{
"timestamp": "2025-01-29T21:29:56.021647",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:56.034146",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" training costs and resource efficiency compared to competitors...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:56.326926",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.barchart.com/story/news/30627491/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today\n",
"metadata": "https://www.barchart.com/story/news/30627491/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today"
}
},
{
"timestamp": "2025-01-29T21:29:56.427207",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\n",
"metadata": "https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/"
}
},
{
"timestamp": "2025-01-29T21:29:57.107052",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://mashable.com/article/what-ai-experts-saying-about-deepseek-r1\n",
"metadata": "https://mashable.com/article/what-ai-experts-saying-about-deepseek-r1"
}
},
{
"timestamp": "2025-01-29T21:29:57.124851",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:29:57.140154",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 3 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:00.692164",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 3 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:00.701689",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 1 new images from 1 total images",
"metadata": [
"https://helios-i.mashable.com/imagery/articles/01ywQklBcfNJQHo7KRl3DJe/hero-image.fill.size_1248x702.v1738094497.jpg"
]
}
},
{
"timestamp": "2025-01-29T21:30:00.716319",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:00.729140",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: Impact of \"Deepseek v3 R1\" on AI industry competition and market share...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:03.548192",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: Multi-Head Latent Attention (MLA): Enhanced the model\u2019s ability to process nuanced relationships and manage multiple inputs simultaneously, making it highly effective for tasks requiring contextual depth.\nWhile overshadowed by high-profile releases from OpenAI and Meta, DeepSeek V3 quietly gained respect in research circles for its combination of scale, cost efficiency, and architectural innovation. It also laid the technical foundation for DeepSeek\u2019s most significant achievement to date: DeepSeek R1..\nDeepSeek took its boldest step yet with DeepSeek R1, launched on January 21, 2025. This open-source AI model has become the startup\u2019s most serious challenge to American tech giants, owing to its formidable reasoning power, lower operating costs, and developer-friendly features.\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: What\u2019s Next for DeepSeek\nDeepSeek\u2019s rapid rise comes with challenges that could shape its future. U.S. export controls restrict access to advanced GPUs, creating a compute gap that could hinder its ability to scale models like R1. While its MoE architecture maximizes efficiency, competing with firms that have access to cutting-edge hardware may become more difficult over time.\nDeepSeek also faces hurdles in market perception. To gain international trust, it must consistently prove its reliability, especially for enterprise-grade deployments. Meanwhile, the fast-evolving AI landscape means competitors like OpenAI or Meta could outpace it with new innovations. Additionally, operating under Chinese regulatory frameworks imposes content restrictions that may limit its appeal in open markets.\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: about AI development. Models like DeepSeek V3 and the groundbreaking DeepSeek R1 prove that success in AI doesn\u2019t always require billion-dollar budgets. Instead, efficiency, adaptability, and strategic partnerships can deliver results that rival even the most expensive models. What makes DeepSeek\u2019s journey even more extraordinary is the sheer shock it has generated within the AI community. Industry experts and researchers have been vocal about their amazement at how a smaller player has managed to compete with\u2014and even outperform\u2014some of the most advanced models developed by vastly better-funded organizations. DeepSeek is showing no signs of slowing down. Its recent launch of DeepThink + Web Search, which enables real-time online lookups, places it ahead of even OpenAI in some capabilities. Looking forward, the company is likely to focus on: Refining reinforcement learning pipelines to further enhance reasoning capabilities. Developing industry-specific models tailored for fields like\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: and competitive capabilities, DeepSeek has managed to thrive in a market dominated by tech giants, proving that innovation and efficiency can rival even the largest budgets. What\u2019s Next for DeepSeek DeepSeek\u2019s rapid rise comes with challenges that could shape its future. U.S. export controls restrict access to advanced GPUs, creating a compute gap that could hinder its ability to scale models like R1. While its MoE architecture maximizes efficiency, competing with firms that have access to cutting-edge hardware may become more difficult over time. DeepSeek also faces hurdles in market perception. To gain international trust, it must consistently prove its reliability, especially for enterprise-grade deployments. Meanwhile, the fast-evolving AI landscape means competitors like OpenAI or Meta could outpace it with new innovations. Additionally, operating under Chinese regulatory frameworks imposes content restrictions that may limit its appeal in open markets. Despite these challenges,\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: and competitive capabilities, DeepSeek has managed to thrive in a market dominated by tech giants, proving that innovation and efficiency can rival even the largest budgets. What\u2019s Next for DeepSeek DeepSeek\u2019s rapid rise comes with challenges that could shape its future. U.S. export controls restrict access to advanced GPUs, creating a compute gap that could hinder its ability to scale models like R1. While its MoE architecture maximizes efficiency, competing with firms that have access to cutting-edge hardware may become more difficult over time. DeepSeek also faces hurdles in market perception. To gain international trust, it must consistently prove its reliability, especially for enterprise-grade deployments. Meanwhile, the fast-evolving AI landscape means competitors like OpenAI or Meta could outpace it with new innovations. Additionally, operating under Chinese regulatory frameworks imposes content restrictions that may limit its appeal in open markets. Despite these challenges,\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: and competitive capabilities, DeepSeek has managed to thrive in a market dominated by tech giants, proving that innovation and efficiency can rival even the largest budgets. What\u2019s Next for DeepSeek DeepSeek\u2019s rapid rise comes with challenges that could shape its future. U.S. export controls restrict access to advanced GPUs, creating a compute gap that could hinder its ability to scale models like R1. While its MoE architecture maximizes efficiency, competing with firms that have access to cutting-edge hardware may become more difficult over time. DeepSeek also faces hurdles in market perception. To gain international trust, it must consistently prove its reliability, especially for enterprise-grade deployments. Meanwhile, the fast-evolving AI landscape means competitors like OpenAI or Meta could outpace it with new innovations. Additionally, operating under Chinese regulatory frameworks imposes content restrictions that may limit its appeal in open markets. Despite these challenges,\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: and competitive capabilities, DeepSeek has managed to thrive in a market dominated by tech giants, proving that innovation and efficiency can rival even the largest budgets. What\u2019s Next for DeepSeek DeepSeek\u2019s rapid rise comes with challenges that could shape its future. U.S. export controls restrict access to advanced GPUs, creating a compute gap that could hinder its ability to scale models like R1. While its MoE architecture maximizes efficiency, competing with firms that have access to cutting-edge hardware may become more difficult over time. DeepSeek also faces hurdles in market perception. To gain international trust, it must consistently prove its reliability, especially for enterprise-grade deployments. Meanwhile, the fast-evolving AI landscape means competitors like OpenAI or Meta could outpace it with new innovations. Additionally, operating under Chinese regulatory frameworks imposes content restrictions that may limit its appeal in open markets. Despite these challenges,\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: and competitive capabilities, DeepSeek has managed to thrive in a market dominated by tech giants, proving that innovation and efficiency can rival even the largest budgets. What\u2019s Next for DeepSeek DeepSeek\u2019s rapid rise comes with challenges that could shape its future. U.S. export controls restrict access to advanced GPUs, creating a compute gap that could hinder its ability to scale models like R1. While its MoE architecture maximizes efficiency, competing with firms that have access to cutting-edge hardware may become more difficult over time. DeepSeek also faces hurdles in market perception. To gain international trust, it must consistently prove its reliability, especially for enterprise-grade deployments. Meanwhile, the fast-evolving AI landscape means competitors like OpenAI or Meta could outpace it with new innovations. Additionally, operating under Chinese regulatory frameworks imposes content restrictions that may limit its appeal in open markets. Despite these challenges,\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: multiple inputs simultaneously, making it highly effective for tasks requiring contextual depth. While overshadowed by high-profile releases from OpenAI and Meta, DeepSeek V3 quietly gained respect in research circles for its combination of scale, cost efficiency, and architectural innovation. It also laid the technical foundation for DeepSeek\u2019s most significant achievement to date: DeepSeek R1.. DeepSeek R1 DeepSeek took its boldest step yet with DeepSeek R1, launched on January 21, 2025. This open-source AI model has become the startup\u2019s most serious challenge to American tech giants, owing to its formidable reasoning power, lower operating costs, and developer-friendly features. \ud83d\ude80 DeepSeek-R1 is here!\u26a1 Performance on par with OpenAI-o1\ud83d\udcd6 Fully open-source model & technical report\ud83c\udfc6 MIT licensed: Distill & commercialize freely!\ud83c\udf10 Website & API are live now! Try DeepThink at https://t.co/v1TFy7LHNy today!\ud83d\udc0b 1/n pic.twitter.com/7BlpWAPu6y\u2014 DeepSeek (@deepseek_ai) January 20, 2025 Key\n\nSource: https://felloai.com/2025/01/all-about-deepseek-the-rising-ai-powerhouse-challenging-industry-giants/\nTitle: All About DeepSeek: The Rising AI Powerhouse Challenging Industry Giants | Fello\u00a0AI\nContent: Beyond MoE, Multi-Head Latent Attention (MLA) boosts the models\u2019 ability to process multiple data streams at once. By distributing focus across several \u201cattention heads,\u201d they can better identify contextual relationships and handle nuanced inputs\u2014even when processing tens of thousands of tokens in a single request.\nDeepSeek\u2019s innovations also extend to model distillation, where knowledge from its larger models is transferred to smaller, more efficient versions, such as DeepSeek-R1-Distill. These compact models retain much of the reasoning power of their larger counterparts but require significantly fewer computational resources, making advanced AI more accessible.\nReactions from the AI Community\nSeveral prominent figures in AI have weighed in on the disruptive potential of DeepSeek R1:\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:03.948493",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: AI enthusiasts\u2014the tools are open. The barriers are mental.\nEntrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?\nBreaking the Myth: AI Development Without Billion-Dollar Budgets\nThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.\nKey takeaways from DeepSeek\u2019s success:\nCost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.\nTechnical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.\nHardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: spent years in the system. Why not challenge it?AI enthusiasts\u2014the tools are open. The barriers are mental.Entrepreneurship is about taking action, not waiting for permission. DeepSeek just proved what\u2019s possible. What\u2019s stopping you from being next?Breaking the Myth: AI Development Without Billion-Dollar BudgetsThe belief that only tech giants can build top-tier AI models has now been completely shattered. DeepSeek has successfully demonstrated that strategic efficiency can outperform unlimited resources.Key takeaways from DeepSeek\u2019s success:Cost-Efficiency: They trained their model for less than $6 million, while competitors spend over $100 million.Technical Performance: Independent benchmarks show DeepSeek outperforming GPT-4 in mathematics, programming, and reasoning tasks.Hardware Limitations Are an Excuse: They developed high-performing AI without access to the most advanced GPUs.For those still waiting for the \u201cright conditions\u201d to innovate, DeepSeek just proved you don\u2019t need\n\nSource: https://medium.com/@dharmatej14june2001/deepseek-just-proved-a-point-whats-stopping-you-ac9db40d0bb7\nTitle: DeepSeek Just Proved a Point\u2014What\u2019s Stopping You? | by Karri Dharma Teja | Jan, 2025 | Medium\nContent: Open in appSign upSign inWriteSign upSign inDeepSeek Just Proved a Point\u2014What\u2019s Stopping You?AI Disruption & Open-Source InnovationKarri Dharma Teja\u00b7Follow3 min read\u00b71 hour ago--ListenShareIn the world of artificial intelligence, we\u2019ve been conditioned to believe that only the biggest tech giants with billion-dollar budgets can lead innovation. Companies like OpenAI, Google, and Meta invest hundreds of millions in training AI models, pushing the boundaries of deep learning. But DeepSeek has just rewritten the rules with an approach that proves money isn\u2019t the only key to success.DeepSeek Just Proved a Point \u2014 What\u2019s Stopping You?A Chinese AI startup, DeepSeek, built an open-source large language model (LLM) that not only competes with GPT-4 but does so at a fraction of the cost. While OpenAI and Google require tens of millions of dollars to train their models, DeepSeek achieved comparable performance for under $6 million by optimizing existing open-source frameworks and using efficient\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:06.885979",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Reinforcement Learning: Two RL stages to refine performance and reasoning. Supervised Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Reinforcement Learning: Two RL stages to refine performance and reasoning. Supervised Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Reinforcement Learning: Two RL stages to refine performance and reasoning. Supervised Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Reinforcement Learning: Two RL stages to refine performance and reasoning. Supervised Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: reasoning skills. Reinforcement Learning: Two RL stages to refine performance and reasoning. Supervised Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: reasoning skills. Reinforcement Learning: Two RL stages to refine performance and reasoning. Supervised Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: Head of AI Security & Strategy @ Aon 1d Edited Report this post The DeepSeek AI situation in 60 seconds: 1/ \ud835\udc0c\ud835\udc28\ud835\udc1d\ud835\udc1e\ud835\udc25 \ud835\udc11\ud835\udc1e\ud835\udc25\ud835\udc1e\ud835\udc1a\ud835\udc2c\ud835\udc1e\ud835\udc2c: DeepSeek, AI startup based in China, released two major models: DeepSeek-V3, a 671-billion-parameter Mixture of Experts (MoE) base model, and DeepSeek-R1, a reasoning-focused model built upon V3. Additionally, the company has released smaller, distilled versions of these models, ranging from 1.5 billion to 70 billion parameters, to enhance accessibility across various applications. The company reports development costs of approximately $6 million, though this figure is debated given their parent company's broader AI investments. 2/ \ud835\udc0f\ud835\udc1e\ud835\udc2b\ud835\udc1f\ud835\udc28\ud835\udc2b\ud835\udc26\ud835\udc1a\ud835\udc27\ud835\udc1c\ud835\udc1e \ud835\udc1a\ud835\udc27\ud835\udc1d \ud835\udc01\ud835\udc1e\ud835\udc27\ud835\udc1c\ud835\udc21\ud835\udc26\ud835\udc1a\ud835\udc2b\ud835\udc24\ud835\udc2c: R1 demonstrates impressive performance, achieving a 79.8% Pass@1 on AIME 2024, 97.3% on MATH-500, and a 2,029 Elo rating on Codeforces, outperforming 96.3% of human participants. That said, it\u2019s worth noting that benchmarks like these can be gamed or skewed by including test data in training. Always take\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to OpenAI\u2019s o1-1217: CategoryBenchmarkDeepSeek-R1OpenAI o1-1217ReasoningAIME 2024\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to OpenAI\u2019s o1-1217: CategoryBenchmarkDeepSeek-R1OpenAI o1-1217ReasoningAIME 2024\n\nSource: https://www.innobu.com/deepseek-r1-open-source-reasoning-model-explained-ai-guide/\nTitle: DeepSeek-R1: Open-Source Reasoning\nContent: Fine-Tuning: Final SFT stages to align the model with human preferences and optimize outputs. Architectural Advancements DeepSeek-R1 boasts an efficient design with 671 billion parameters, but only 37 billion are activated per token, thanks to its Mixture-of-Experts (MoE) architecture. Key innovations include: Multi-head Latent Attention (MLA): Reduces memory usage by compressing key-value cache size, achieving a 6.3x reduction. DeepSeekMoE Architecture: Activates only a fraction of parameters, optimizing compute efficiency. Multi-token Prediction (MTP): Improves data efficiency and inference speed by predicting multiple tokens simultaneously. These innovations allow DeepSeek-R1 to achieve performance levels comparable to OpenAI\u2019s o1 while requiring significantly less compute power. Performance Benchmarks DeepSeek-R1 excels in reasoning, mathematics, and coding tasks. Here\u2019s how it compares to OpenAI\u2019s o1-1217: CategoryBenchmarkDeepSeek-R1OpenAI o1-1217ReasoningAIME 2024\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:30:13.811575",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: Our analyst team just revealed what they believe are the 10 best stocks to buy right now. See the 10 stocks \u00bbThat led most AI hardware names, many of which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: Our analyst team just revealed what they believe are the 10 best stocks to buy right now. See the 10 stocks \u00bbThat led most AI hardware names, many of which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on R1 explains how the company cleverly used a variety of hacks to derive tremendous results from limited hardware. Over the weekend, its solutions\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on R1 explains how the company cleverly used a variety of hacks to derive tremendous results from limited hardware. Over the weekend, its solutions\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on R1 explains how the company cleverly used a variety of hacks to derive tremendous results from limited hardware. Over the weekend, its solutions\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on R1 explains how the company cleverly used a variety of hacks to derive tremendous results from limited hardware. Over the weekend, its solutions\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: EDT.The obvious reason for the sell-off is last week's release of the AI reasoning model DeepSeek V3 R1. Over the weekend, it became apparent that DeepSeek, a little-known Chinese AI lab, had built a leading frontier AI model with significantly less computing power than had been thought necessary.Where to invest $1,000 right now? Our analyst team just revealed what they believe are the 10 best stocks to buy right now. See the 10 stocks \u00bbThat led most AI hardware names, many of which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer, founded in 2015 by entrepreneur Liang Wenfeng. DeepSeek released the large language model V3 in December, but on January 20, it released the \"reasoning\" version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on R1 explains how the company cleverly used a variety of hacks to derive tremendous results from limited hardware. Over the weekend, its solutions withstood scrutiny by experts.In response, major AI hardware stocks are selling off. The thinking is that companies will now be able to spend less on AI infrastructure to build\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: Shares of AI-related hardware companies plunged on Monday, including Taiwan SemiconductorManufacturing(NYSE: TSM), Marvell Technology(NASDAQ: MRVL), and Arista Networks(NYSE: ANET), which fell 14.4%, 18%, and 21.9%, respectively, as of 1:11 PM EDT.The obvious reason for the sell-off is last week's release of the AI reasoning model DeepSeek V3 R1. Over the weekend, it became apparent that DeepSeek, a little-known Chinese AI lab, had built a leading frontier AI model with significantly less computing power than had been thought necessary.Where to invest $1,000 right now? Our analyst team just revealed what they believe are the 10 best stocks to buy right now. See the 10 stocks \u00bbThat led most AI hardware names, many of which were trading at high valuations, to sell off hard. But how much of a risk does this new model really pose?DeepSeek rocks the AI worldDeepSeek is a Chinese AI lab that has been in operation since 2023. It was spun out of a quantitative hedge fund called High-Flyer,\n\nSource: https://www.theglobeandmail.com/investing/markets/stocks/ANET-N/pressreleases/30627593/why-artificial-intelligence-ai-hardware-winners-taiwan-semiconductor-marvell-and-arista-networks-plunged-by-double-digits-today/\nTitle: Why Artificial Intelligence (AI) Hardware Winners Taiwan Semiconductor, Marvell, and Arista Networks Plunged by Double Digits Today - The Globe and Mail\nContent: version of the model, called R1. By several metrics, R1 met or exceeded the latest OpenAI reasoning model, called o1.And DeepSeek was able to do it at just a fraction of the cost of o1, with computing power limited by 2022 and 2023 U.S. trade restrictions on leading AI GPUs. We are talking about one-tenth to one-thirtieth of the cost of models built by OpenAI and other \"Magnificent Seven\" names. DeepSeek's public paper on R1 explains how the company cleverly used a variety of hacks to derive tremendous results from limited hardware. Over the weekend, its solutions withstood scrutiny by experts.In response, major AI hardware stocks are selling off. The thinking is that companies will now be able to spend less on AI infrastructure to build leading-edge models. Given the high valuations at which some AI hardware stocks were trading on the expectation of future growth, it's no surprise these stocks are selling off today.This could be bad news for Taiwan Semiconductor, which has a virtual\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:31:56.216008",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh,\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh,\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh,\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh,\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh,\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh,\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: More: Will Knight / Wired: DeepSeek's New AI Model Sparks Shock, Awe, and Questions From US CompetitorsEliebak / Hugging Face: Open-R1: a fully open reproduction of DeepSeek-R1Chas Danner / New York Magazine: 7 Ways to Think About the DeepSeek AI FreakoutAllie Garfinkle / Fortune: The lessons and laments of DeepSeek, according to VCs\nWill Knight / Wired: DeepSeek's New AI Model Sparks Shock, Awe, and Questions From US Competitors\nEliebak / Hugging Face: Open-R1: a fully open reproduction of DeepSeek-R1\nChas Danner / New York Magazine: 7 Ways to Think About the DeepSeek AI Freakout\nAllie Garfinkle / Fortune: The lessons and laments of DeepSeek, according to VCs\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: Robert Mao / @mave99a: If OpenAI had remained as open as it was initially intended, the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.\nThomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it's\nRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?\n\nSource: https://www.techmeme.com/250127/p29\nTitle: Techmeme: Hands-on with DeepSeek's free chatbot: the R1 model is powerful, but suffers from rampant hallucinations and lacks some ChatGPT tools like the memory feature (Reece Rogers/Wired)\nContent: the industry might not have rushed to compete by purchasing billions of GPUs. Open-source AI could have evolved more collaboratively and steadily, potentially leading to even better advancements by now.Thomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it'sRobert Sterling / @robertmsterling: Might be a dumb question, but can't OpenAI, Anthropic, and other AI companies just incorporate the best parts of DeepSeek's source code into their code, then use the massive GPU clusters at their disposal to train models even more powerful than DeepSeek? Am I missing something?@techbrospod: Today is the perfect day to call a VC, ask for their take on DeepSeek, wait until they finish talking, then sigh, say \u201cyou are truly lost\u201d and hang up.Nate /\n\nSource: https://www.techmeme.com/250127/p32\nTitle: Techmeme: Sam Altman says DeepSeek's R1 is an \u201cimpressive model, particularly around what they're able to deliver for the price\u201d and OpenAI \u201cwill pull up some releases\u201d (Vlad Savov/Bloomberg)\nContent: @perplexity_ai: DeepSeek R1 is now available on Perplexity to support deep web research. This is hosted exclusively in US & EU data centers - your data never leaves Western servers. Toggle on Pro Search to try it out.\n@groqinc: \ud83d\udc40 Powered by DeepSeek on Groq!\nHaseeb / @hosseeb: There are ~200 co-authors on the DeepSeek R1 paper. The U.S. should offer each of them immediate citizenship and $20M in cash to move here.\nMiles Brundage / @miles_brundage: Wonder what Moo Deng thinks about DeepSeek\nThomas / @distributionat: why has deepseek conquered my timeline for several days? not sure, but some hypotheses - * the model is very good * anybody can talk to the model and tell that it's very good. unlike sonnet 3.5, which requires you to already know how to talk to models or talk to it a lot. * it's\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:31:56.239856",
"type": "event",
"data": {
"type": "logs",
"content": "research_step_finalized",
"output": "Finalized research step.\n\ud83d\udcb8 Total Research Costs: $0.027217059999999998",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:31:56.254294",
"type": "event",
"data": {
"type": "logs",
"content": "generating_subtopics",
"output": "\ud83c\udf33 Generating subtopics for 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. '...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:31:59.345822",
"type": "event",
"data": {
"type": "logs",
"content": "subtopics_generated",
"output": "\ud83d\udcca Subtopics generated for 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. '",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:31:59.371390",
"type": "event",
"data": {
"type": "logs",
"content": "writing_introduction",
"output": "\u270d\ufe0f Writing introduction for 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. '...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:13.492671",
"type": "event",
"data": {
"type": "logs",
"content": "introduction_written",
"output": "\ud83d\udcdd Introduction written for 'Give me a detailed research report about Deepseek v3 R1 model and how its impacting the AI industry. '",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:13.505330",
"type": "event",
"data": {
"type": "logs",
"content": "starting_research",
"output": "\ud83d\udd0d Starting the research task for 'DeepSeek V3 R1: Architecture and Innovations'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:13.525463",
"type": "event",
"data": {
"type": "logs",
"content": "agent_generated",
"output": "\ud83e\udd16 AI Research Agent",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:13.545153",
"type": "event",
"data": {
"type": "logs",
"content": "planning_research",
"output": "\ud83c\udf10 Browsing the web to learn more about the task: DeepSeek V3 R1: Architecture and Innovations...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:17.759483",
"type": "event",
"data": {
"type": "logs",
"content": "planning_research",
"output": "\ud83e\udd14 Planning the research strategy and subtasks...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:20.082530",
"type": "event",
"data": {
"type": "logs",
"content": "subqueries",
"output": "\ud83d\uddc2\ufe0f I will conduct my research based on the following queries: ['\"Deepseek v3 R1\" architecture innovations', '\"Deepseek v3 R1\" impact AI industry \"Multi-Head Latent Attention\"', '\"Deepseek v3 R1\" performance benchmarks comparison \"open-source\"', '\"Deepseek v3 R1\" training efficiency cost analysis \"Mixture-of-Experts\"']...",
"metadata": [
"\"Deepseek v3 R1\" architecture innovations",
"\"Deepseek v3 R1\" impact AI industry \"Multi-Head Latent Attention\"",
"\"Deepseek v3 R1\" performance benchmarks comparison \"open-source\"",
"\"Deepseek v3 R1\" training efficiency cost analysis \"Mixture-of-Experts\""
]
}
},
{
"timestamp": "2025-01-29T21:32:20.097148",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" architecture innovations'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:20.113307",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" impact AI industry \"Multi-Head Latent Attention\"'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:20.131185",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" performance benchmarks comparison \"open-source\"'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:20.142348",
"type": "event",
"data": {
"type": "logs",
"content": "running_subquery_research",
"output": "\n\ud83d\udd0d Running research for '\"Deepseek v3 R1\" training efficiency cost analysis \"Mixture-of-Experts\"'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:23.905633",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\n",
"metadata": "https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/"
}
},
{
"timestamp": "2025-01-29T21:32:23.922704",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: http://www.google.com/search?hl=en&q=\"Deepseek+v3+R1\"+architecture+innovations\n",
"metadata": "http://www.google.com/search?hl=en&q=\"Deepseek+v3+R1\"+architecture+innovations"
}
},
{
"timestamp": "2025-01-29T21:32:23.963250",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:23.980289",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 2 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:24.674884",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 2 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:24.687172",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 4 new images from 5 total images",
"metadata": [
"https://naixianzhang.com/wp-content/uploads/2025/01/image-57.png?w=1024",
"https://naixianzhang.com/wp-content/uploads/2025/01/image-58.png?w=1024",
"https://naixianzhang.com/wp-content/uploads/2025/01/image-59.png?w=1024",
"https://naixianzhang.com/wp-content/uploads/2025/01/image-60.png?w=1024"
]
}
},
{
"timestamp": "2025-01-29T21:32:24.716437",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:24.734887",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" architecture innovations...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:25.018376",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.linkedin.com/pulse/deepseek-r1-next-leap-ai-reasoning-logical-inference-pandiya-fwlqe\n",
"metadata": "https://www.linkedin.com/pulse/deepseek-r1-next-leap-ai-reasoning-logical-inference-pandiya-fwlqe"
}
},
{
"timestamp": "2025-01-29T21:32:25.038585",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://x.com/JohnKuhles1966/status/1883940370964439349\n",
"metadata": "https://x.com/JohnKuhles1966/status/1883940370964439349"
}
},
{
"timestamp": "2025-01-29T21:32:25.055150",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.reddit.com/r/NVDA_Stock/rising/\n",
"metadata": "https://www.reddit.com/r/NVDA_Stock/rising/"
}
},
{
"timestamp": "2025-01-29T21:32:25.073208",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://news.ycombinator.com/item?id=42847825\n",
"metadata": "https://news.ycombinator.com/item?id=42847825"
}
},
{
"timestamp": "2025-01-29T21:32:25.090920",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.techmeme.com/250127/p14\n",
"metadata": "https://www.techmeme.com/250127/p14"
}
},
{
"timestamp": "2025-01-29T21:32:25.104341",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:25.127088",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 5 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:27.550344",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 4 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:27.566274",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 0 new images from 0 total images",
"metadata": []
}
},
{
"timestamp": "2025-01-29T21:32:27.579895",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:27.593404",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" training efficiency cost analysis \"Mixture-of-Experts\"...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:28.061613",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.facebook.com/groups/AIUGM/\n",
"metadata": "https://www.facebook.com/groups/AIUGM/"
}
},
{
"timestamp": "2025-01-29T21:32:28.404056",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en\n",
"metadata": "https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en"
}
},
{
"timestamp": "2025-01-29T21:32:28.420710",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\n",
"metadata": "https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/"
}
},
{
"timestamp": "2025-01-29T21:32:28.439939",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:28.457880",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 3 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:29.283155",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 3 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:29.299992",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 0 new images from 0 total images",
"metadata": []
}
},
{
"timestamp": "2025-01-29T21:32:29.320931",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:29.337826",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" impact AI industry \"Multi-Head Latent Attention\"...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:29.435839",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://forum.cursor.com/t/cursor-deepseek/43261\n",
"metadata": "https://forum.cursor.com/t/cursor-deepseek/43261"
}
},
{
"timestamp": "2025-01-29T21:32:29.454594",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\n",
"metadata": "https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost"
}
},
{
"timestamp": "2025-01-29T21:32:29.470771",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://www.reddit.com/r/singularity/comments/1ibngb8/good_way_of_comparing_robustness_between_r1_and/\n",
"metadata": "https://www.reddit.com/r/singularity/comments/1ibngb8/good_way_of_comparing_robustness_between_r1_and/"
}
},
{
"timestamp": "2025-01-29T21:32:29.487266",
"type": "event",
"data": {
"type": "logs",
"content": "added_source_url",
"output": "\u2705 Added source url to research: https://play.ht/blog/deepseek-v3-vs-r1-vs-coder/\n",
"metadata": "https://play.ht/blog/deepseek-v3-vs-r1-vs-coder/"
}
},
{
"timestamp": "2025-01-29T21:32:29.506342",
"type": "event",
"data": {
"type": "logs",
"content": "researching",
"output": "\ud83e\udd14 Researching for relevant information across multiple sources...\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:29.519385",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_urls",
"output": "\ud83c\udf10 Scraping content from 4 URLs...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:31.488435",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_content",
"output": "\ud83d\udcc4 Scraped 4 pages of content",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:31.506663",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_images",
"output": "\ud83d\uddbc\ufe0f Selected 3 new images from 3 total images",
"metadata": [
"https://arbisoft.com/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fcontact.c5602fd6.png&w=1440&q=75",
"https://us1.discourse-cdn.com/flex020/uploads/cursor1/optimized/3X/0/d/0df9e1f23791d1b8e362cb2bcf434e2bac1e7a09_2_419x500.png",
"https://arbisoft.com/_next/image?url=https%3A%2F%2Fd1foa0aaimjyw4.cloudfront.net%2FBlog_Image_1_b56afb0c54.png&w=1920&q=75"
]
}
},
{
"timestamp": "2025-01-29T21:32:31.525780",
"type": "event",
"data": {
"type": "logs",
"content": "scraping_complete",
"output": "\ud83c\udf10 Scraping complete",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:31.543245",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_query_content",
"output": "\ud83d\udcda Getting relevant content based on query: \"Deepseek v3 R1\" performance benchmarks comparison \"open-source\"...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:31.670116",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: capable of self-improvement without human supervision, further establishing its position in the AI research community. The key innovations by deepseek? DeepSeek V3 R1 likely innovates by integrating: Weighted relevance scoring 2. Contextual Query refinement 3. Hierarchical Search Space Optimization 4. Attention driven ranking Another version from Claude 3.5 Sonnet: Searched web: deepseek r1 github code implementation reinforcement learning distillation GitHub \u2013 deepseek-ai/DeepSeek-R1 DeepSeek-R1 \u2014 Training Language Models to reason through Reinforcement \u2026 Aakash Nain \u2013 DeepSeek-R1 unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF \u2013 Hugging Face DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via DeepSeek-R1 \u2026 MoE Architecture Implementation # Pseudo-code representation of MoE layer class MoELayer: def __init__(self, num_experts=16, hidden_size=4096): self.experts = [Expert() for _ in range(num_experts)] self.router = Router(hidden_size, num_experts) def forward(self, x): # Router\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: its position in the AI research community. The key innovations by deepseek? DeepSeek V3 R1 likely innovates by integrating: Weighted relevance scoring 2. Contextual Query refinement 3. Hierarchical Search Space Optimization 4. Attention driven ranking Another version from Claude 3.5 Sonnet: Searched web: deepseek r1 github code implementation reinforcement learning distillation GitHub \u2013 deepseek-ai/DeepSeek-R1 DeepSeek-R1 \u2014 Training Language Models to reason through Reinforcement \u2026 Aakash Nain \u2013 DeepSeek-R1 unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF \u2013 Hugging Face DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via DeepSeek-R1 \u2026 MoE Architecture Implementation # Pseudo-code representation of MoE layer class MoELayer: def __init__(self, num_experts=16, hidden_size=4096): self.experts = [Expert() for _ in range(num_experts)] self.router = Router(hidden_size, num_experts) def forward(self, x): # Router selects top-k experts for each token expert_weights = self.router(x) # Shape:\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: its position in the AI research community. The key innovations by deepseek? DeepSeek V3 R1 likely innovates by integrating: Weighted relevance scoring 2. Contextual Query refinement 3. Hierarchical Search Space Optimization 4. Attention driven ranking Another version from Claude 3.5 Sonnet: Searched web: deepseek r1 github code implementation reinforcement learning distillation GitHub \u2013 deepseek-ai/DeepSeek-R1 DeepSeek-R1 \u2014 Training Language Models to reason through Reinforcement \u2026 Aakash Nain \u2013 DeepSeek-R1 unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF \u2013 Hugging Face DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via DeepSeek-R1 \u2026 MoE Architecture Implementation # Pseudo-code representation of MoE layer class MoELayer: def __init__(self, num_experts=16, hidden_size=4096): self.experts = [Expert() for _ in range(num_experts)] self.router = Router(hidden_size, num_experts) def forward(self, x): # Router selects top-k experts for each token expert_weights = self.router(x) # Shape:\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model\nJanuary 26, 2025January 26, 2025 / Naixian Zhang\nJanuary 26, 2025January 26, 2025\n/ Naixian Zhang\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: DeepSeek originated as a research initiative within High-Flyer, a Chinese quantitative hedge fund known for its AI-driven trading strategies. In April 2023, High-Flyer established DeepSeek as an independent entity dedicated to advancing artificial general intelligence (AGI), explicitly separating its research from the firm\u2019s financial operations. Wikipedia Since its inception, DeepSeek has developed several notable AI models. In May 2024, the company released DeepSeek-V2, which gained attention for its strong performance and cost-effectiveness, prompting competitive responses from major tech companies in China. Wikipedia More recently, in January 2025, DeepSeek introduced the R1 model, capable of self-improvement without human supervision, further establishing its position in the AI research community. The key innovations by deepseek? DeepSeek V3 R1 likely innovates by integrating: Weighted relevance scoring 2. Contextual Query refinement 3. Hierarchical Search Space Optimization 4.\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: DeepSeek originated as a research initiative within High-Flyer, a Chinese quantitative hedge fund known for its AI-driven trading strategies. In April 2023, High-Flyer established DeepSeek as an independent entity dedicated to advancing artificial general intelligence (AGI), explicitly separating its research from the firm\u2019s financial operations. Wikipedia\nSince its inception, DeepSeek has developed several notable AI models. In May 2024, the company released DeepSeek-V2, which gained attention for its strong performance and cost-effectiveness, prompting competitive responses from major tech companies in China. Wikipedia\nMore recently, in January 2025, DeepSeek introduced the R1 model, capable of self-improvement without human supervision, further establishing its position in the AI research community.\nThe key innovations by deepseek? DeepSeek V3 R1 likely innovates by integrating:\nWeighted relevance scoring\n2. Contextual Query refinement\n3. Hierarchical Search Space Optimization\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model January 26, 2025January 26, 2025 / Naixian Zhang DeepSeek originated as a research initiative within High-Flyer, a Chinese quantitative hedge fund known for its AI-driven trading strategies. In April 2023, High-Flyer established DeepSeek as an independent entity dedicated to advancing artificial general intelligence (AGI), explicitly separating its research from the firm\u2019s financial operations. Wikipedia Since its inception, DeepSeek has developed several notable AI models. In May 2024, the company released DeepSeek-V2, which gained attention for its strong performance and cost-effectiveness, prompting competitive responses from major tech companies in China. Wikipedia More recently, in January 2025, DeepSeek introduced the R1 model, capable of self-improvement without human supervision, further establishing its position in the AI research community. The key innovations by deepseek? DeepSeek V3 R1\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model January 26, 2025January 26, 2025 / Naixian Zhang DeepSeek originated as a research initiative within High-Flyer, a Chinese quantitative hedge fund known for its AI-driven trading strategies. In April 2023, High-Flyer established DeepSeek as an independent entity dedicated to advancing artificial general intelligence (AGI), explicitly separating its research from the firm\u2019s financial operations. Wikipedia Since its inception, DeepSeek has developed several notable AI models. In May 2024, the company released DeepSeek-V2, which gained attention for its strong performance and cost-effectiveness, prompting competitive responses from major tech companies in China. Wikipedia More recently, in January 2025, DeepSeek introduced the R1 model, capable of self-improvement without human supervision, further establishing its position in the AI research community. The key innovations by deepseek? DeepSeek V3 R1\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: Naixian Zhang Menu Skip to content Home About Contact Search Search for: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model January 26, 2025January 26, 2025 / Naixian Zhang DeepSeek originated as a research initiative within High-Flyer, a Chinese quantitative hedge fund known for its AI-driven trading strategies. In April 2023, High-Flyer established DeepSeek as an independent entity dedicated to advancing artificial general intelligence (AGI), explicitly separating its research from the firm\u2019s financial operations. Wikipedia Since its inception, DeepSeek has developed several notable AI models. In May 2024, the company released DeepSeek-V2, which gained attention for its strong performance and cost-effectiveness, prompting competitive responses from major tech companies in China. Wikipedia More recently, in January 2025, DeepSeek introduced the R1 model, capable of self-improvement without human supervision, further establishing its position in the\n\nSource: https://naixianzhang.com/2025/01/26/who-is-the-inventor-of-deepseek-and-what-are-the-key-innovation-of-deepseek-v3-r1-model/\nTitle: Who is the Inventor of Deepseek and What are the Key Innovation of Deepseek v3 R1 Model | Naixian Zhang\nContent: relevance scoring 2. Contextual Query refinement 3. Hierarchical Search Space Optimization 4. Attention driven ranking Another version from Claude 3.5 Sonnet: Searched web: deepseek r1 github code implementation reinforcement learning distillation GitHub \u2013 deepseek-ai/DeepSeek-R1 DeepSeek-R1 \u2014 Training Language Models to reason through Reinforcement \u2026 Aakash Nain \u2013 DeepSeek-R1 unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF \u2013 Hugging Face DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via DeepSeek-R1 \u2026 MoE Architecture Implementation # Pseudo-code representation of MoE layer class MoELayer: def __init__(self, num_experts=16, hidden_size=4096): self.experts = [Expert() for _ in range(num_experts)] self.router = Router(hidden_size, num_experts) def forward(self, x): # Router selects top-k experts for each token expert_weights = self.router(x) # Shape: [batch_size, seq_len, num_experts] top_k_weights, top_k_indices = select_top_k(expert_weights, k=2) # Only activate selected experts\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:44.378661",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: attention (MHLA) for reduced memory usage, mixed precision computation on FP8, and a post-training re-architecture. While MoE models inherently offer efficiency advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S. companies are overspending on AI development. Despite the technological advancements, some analysts like Wedbush's Ives\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: attention (MHLA) for reduced memory usage, mixed precision computation on FP8, and a post-training re-architecture. While MoE models inherently offer efficiency advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S. companies are overspending on AI development. Despite the technological advancements, some analysts like Wedbush's Ives\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: January 20th that rivals OpenAI's ChatGPT and Meta's Llama 3.1. DeepSeek utilizes \"inference-time computing,\" activating only necessary parts of its model for each query, which is more cost and energy efficient. This has garnered praise from tech figures like Marc Andreessen, who called it a \"profound gift to the world.\" DeepSeek, a Chinese AI startup specializing in open-source large language models (LLMs), has released two notable models: DeepSeek-V3 and DeepSeek-R1. DeepSeek-V3 LLM utilizes a Mixture of Experts (MoE) architecture, combining several smaller models with a total of 671 billion parameters, but activating only 37 billion parameters for each token during inference. This approach significantly enhances efficiency, estimated to be 10x better than some peers and 3-7x better considering other innovations. V3 incorporates further advancements like multi-head latent attention (MHLA) for reduced memory usage, mixed precision computation on FP8, and a post-training\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: January 20th that rivals OpenAI's ChatGPT and Meta's Llama 3.1. DeepSeek utilizes \"inference-time computing,\" activating only necessary parts of its model for each query, which is more cost and energy efficient. This has garnered praise from tech figures like Marc Andreessen, who called it a \"profound gift to the world.\" DeepSeek, a Chinese AI startup specializing in open-source large language models (LLMs), has released two notable models: DeepSeek-V3 and DeepSeek-R1. DeepSeek-V3 LLM utilizes a Mixture of Experts (MoE) architecture, combining several smaller models with a total of 671 billion parameters, but activating only 37 billion parameters for each token during inference. This approach significantly enhances efficiency, estimated to be 10x better than some peers and 3-7x better considering other innovations. V3 incorporates further advancements like multi-head latent attention (MHLA) for reduced memory usage, mixed precision computation on FP8, and a post-training\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S. companies are overspending on AI development. Despite the technological advancements, some analysts like Wedbush's Ives express doubt about DeepSeek's adoption by major U.S. businesses, citing concerns about using a Chinese startup for critical AI infrastructure and Nvidia's\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S. companies are overspending on AI development. Despite the technological advancements, some analysts like Wedbush's Ives express doubt about DeepSeek's adoption by major U.S. businesses, citing concerns about using a Chinese startup for critical AI infrastructure and Nvidia's\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S. companies are overspending on AI development. Despite the technological advancements, some analysts like Wedbush's Ives express doubt about DeepSeek's adoption by major U.S. businesses, citing concerns about using a Chinese startup for critical AI infrastructure and Nvidia's\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S. companies are overspending on AI development. Despite the technological advancements, some analysts like Wedbush's Ives express doubt about DeepSeek's adoption by major U.S. businesses, citing concerns about using a Chinese startup for critical AI infrastructure and Nvidia's\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/?utm_source=rss&utm_campaign=storylines_en\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: some peers and 3-7x better considering other innovations. V3 incorporates further advancements like multi-head latent attention (MHLA) for reduced memory usage, mixed precision computation on FP8, and a post-training re-architecture. While MoE models inherently offer efficiency advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S.\n\nSource: https://www.linkedin.com/news/story/dominant-nvidia-tested-by-deepseek-7138610/\nTitle: Dominant Nvidia tested by DeepSeek | LinkedIn\nContent: some peers and 3-7x better considering other innovations. V3 incorporates further advancements like multi-head latent attention (MHLA) for reduced memory usage, mixed precision computation on FP8, and a post-training re-architecture. While MoE models inherently offer efficiency advantages, V3's performance is particularly noteworthy. DeepSeek-R1 reinforcement learning reasoning model is benchmarked against OpenAI's GPT models. A key claim is that DeepSeek-R1 has eliminated the need for supervised fine-tuning, suggesting a novel approach to training. While DeepSeek's models demonstrate impressive performance and incorporate innovative techniques, including more standard optimization methods, there's ongoing discussion about the extent of DeepSeek's original contributions versus their reliance on existing open-source LLMs. The emergence of a low-cost, high-performing AI tool from China has sparked discussions on Wall Street about the long-term impact on the AI market and whether U.S.\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:32:49.668163",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: comparison breakdown:1. PerformanceDeepSeek-R1 outperforms OpenAI-o1 on key benchmarks like AIME, MATH-500, and SWE-bench.Demonstrates higher accuracy and faster response times for complex problem-solving tasks.Excels in logical reasoning and analytical capabilities, making it ideal for reasoning-focused applications. 2. Development Time and CostDeepSeek-R1 was developed significantly faster due to optimized training techniques and efficient computation.Its development cost is 90-95% lower than OpenAI-o1, reducing dependency on expensive GPU clusters.OpenAI-o1 required years of high-cost iterative training with massive datasets and computational resources. 3. Cost of UseDue to its high cost, OpenAI-o1 is only available to large businesses with sizable budgets.DeepSeek-R1 offers comparable performance at a fraction of the cost, making it affordable for startups and developers.R1 is open-source and available under an MIT license on Hugging Face, democratizing advanced AI. 4. Technical\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: 4. Technical Efficiency\nDeepSeek-R1 uses an optimized architecture to handle complex reasoning with fewer GPUs and lower energy consumption.\nIts computation-focused design allows for \u201clonger thinking\u201d without resource-intensive processes.\nOpenAI-o1 relies on vast computational power for fine-tuning, which means higher costs and less scalability.\nDeepSeek-R1 Benchmark Brilliance\nDeepSeek-R1 has delivered impressive results across multiple benchmarks, demonstrating its competitive edge in various domains:\nMathematics - Achieved 79.8% (Pass@1) on AIME 2024 and an outstanding 93% on MATH-500.\nCoding - Ranked in the 96.3rd percentile on Codeforces.\nGeneral Knowledge - Scored 90.8% on MMLU and 71.5% on GPQA Diamond.\nWriting - Secured 87.6% on AlpacaEval 2.0 for question answering.\nThese numbers place DeepSeek-R1 in line with industry leaders, like OpenAI and Meta. In some areas, it even surpasses them, proving that open-source models can punch above their weight.\n\nSource: https://forum.cursor.com/t/cursor-deepseek/43261\nTitle: Cursor & DeepSeek - Feature Requests - Cursor - Community Forum\nContent: Deepseek R1 is performing much better than sonnet 3.5 so far, I have been playing with it and it\u2019s great. I switch back to sonnet and it feel dumb compared to it. I think deepseek R1 is the new king for coding.\ntemp-sunsetmango January 25, 2025, 1:47am 8 if they make any money off the 40% requests they need to seriously reconsider that I just blew through $20 of fast o1 and got almsot nothing done but deepseek is doing it better for free or am i just idk singularity\ntemp-sunsetmango January 25, 2025, 1:47am 8\nJanuary 25, 2025, 1:47am 8\nif they make any money off the 40% requests they need to seriously reconsider that I just blew through $20 of fast o1 and got almsot nothing done but deepseek is doing it better for free or am i just idk singularity\nif they make any money off the 40% requests they need to seriously reconsider that\nI just blew through $20 of fast o1 and got almsot nothing done\nbut deepseek is doing it better for free\nor am i just idk singularity\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: Demonstrates higher accuracy and faster response times for complex problem-solving tasks.\nExcels in logical reasoning and analytical capabilities, making it ideal for reasoning-focused applications.\n2. Development Time and Cost\nDeepSeek-R1 was developed significantly faster due to optimized training techniques and efficient computation.\nIts development cost is 90-95% lower than OpenAI-o1, reducing dependency on expensive GPU clusters.\nOpenAI-o1 required years of high-cost iterative training with massive datasets and computational resources.\n3. Cost of Use\nDue to its high cost, OpenAI-o1 is only available to large businesses with sizable budgets.\nDeepSeek-R1 offers comparable performance at a fraction of the cost, making it affordable for startups and developers.\nR1 is open-source and available under an MIT license on Hugging Face, democratizing advanced AI.\n4. Technical Efficiency\n\nSource: https://play.ht/blog/deepseek-v3-vs-r1-vs-coder/\nTitle: DeepSeek-V3, R1, Coder: What's The Differences?\nContent: DeepSeek-V3, R1, Coder: What\u2019s the Differences?\nDeepSeek-V3, R1, Coder: What\u2019s the Differences? With a lot of buzz around DeepSeek-V3, R1, Coder, just what are the Differences between each? in API January 27, 2025 4 min read Share this post\nDeepSeek-V3, R1, Coder: What\u2019s the Differences? With a lot of buzz around DeepSeek-V3, R1, Coder, just what are the Differences between each? in API January 27, 2025 4 min read Share this post\nDeepSeek-V3, R1, Coder: What\u2019s the Differences? With a lot of buzz around DeepSeek-V3, R1, Coder, just what are the Differences between each? in API January 27, 2025 4 min read\nDeepSeek-V3, R1, Coder: What\u2019s the Differences? With a lot of buzz around DeepSeek-V3, R1, Coder, just what are the Differences between each?\nWith a lot of buzz around DeepSeek-V3, R1, Coder, just what are the Differences between each?\nin API January 27, 2025 4 min read\nin API January 27, 2025 4 min read\nJanuary 27, 2025 4 min read\nShare this post\nShare this post\nShare this post\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: and available under an MIT license on Hugging Face, democratizing advanced AI. 4. Technical EfficiencyDeepSeek-R1 uses an optimized architecture to handle complex reasoning with fewer GPUs and lower energy consumption.Its computation-focused design allows for \u201clonger thinking\u201d without resource-intensive processes.OpenAI-o1 relies on vast computational power for fine-tuning, which means higher costs and less scalability. DeepSeek-R1 Benchmark BrillianceDeepSeek-R1 has delivered impressive results across multiple benchmarks, demonstrating its competitive edge in various domains: Mathematics - Achieved 79.8% (Pass@1) on AIME 2024 and an outstanding 93% on MATH-500.Coding - Ranked in the 96.3rd percentile on Codeforces.General Knowledge - Scored 90.8% on MMLU and 71.5% on GPQA Diamond.Writing - Secured 87.6% on AlpacaEval 2.0 for question answering. These numbers place DeepSeek-R1 in line with industry leaders, like OpenAI and Meta. In some areas, it even surpasses them, proving that\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: it even more disruptive. DeepSeek isn\u2019t just competing\u2014it\u2019s rewriting the AI playbook. The Training ApproachUnlike traditional models, DeepSeek-R1 skips the usual supervised fine-tuning (SFT) and dives straight into reinforcement learning. This means it learns to reason and solve problems independently\u2014no hand-holding required. The result is a model that is capable of self-verification, reflection, and creating detailed chain-of-thought (CoT) responses. This isn\u2019t just theory. DeepSeek-R1 is the first open-source model to prove that advanced reasoning in large language models can be achieved purely with RL. It\u2019s a game-changer for researchers and developers looking to push the boundaries of what AI can do. DeepSeek-R1 Vs OpenAI-O1 Let\u2019s take a closer look at how DeepSeek-R1 stacks up against OpenAI-o1 in terms of various benchmarks in this graph: Source: HuggingFace Here is a further comparison breakdown:1. PerformanceDeepSeek-R1 outperforms OpenAI-o1 on key benchmarks like AIME,\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: capabilities, making it ideal for reasoning-focused applications. 2. Development Time and CostDeepSeek-R1 was developed significantly faster due to optimized training techniques and efficient computation.Its development cost is 90-95% lower than OpenAI-o1, reducing dependency on expensive GPU clusters.OpenAI-o1 required years of high-cost iterative training with massive datasets and computational resources. 3. Cost of UseDue to its high cost, OpenAI-o1 is only available to large businesses with sizable budgets.DeepSeek-R1 offers comparable performance at a fraction of the cost, making it affordable for startups and developers.R1 is open-source and available under an MIT license on Hugging Face, democratizing advanced AI. 4. Technical EfficiencyDeepSeek-R1 uses an optimized architecture to handle complex reasoning with fewer GPUs and lower energy consumption.Its computation-focused design allows for \u201clonger thinking\u201d without resource-intensive processes.OpenAI-o1 relies on vast\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: design allows for \u201clonger thinking\u201d without resource-intensive processes.OpenAI-o1 relies on vast computational power for fine-tuning, which means higher costs and less scalability. DeepSeek-R1 Benchmark BrillianceDeepSeek-R1 has delivered impressive results across multiple benchmarks, demonstrating its competitive edge in various domains: Mathematics - Achieved 79.8% (Pass@1) on AIME 2024 and an outstanding 93% on MATH-500.Coding - Ranked in the 96.3rd percentile on Codeforces.General Knowledge - Scored 90.8% on MMLU and 71.5% on GPQA Diamond.Writing - Secured 87.6% on AlpacaEval 2.0 for question answering. These numbers place DeepSeek-R1 in line with industry leaders, like OpenAI and Meta. In some areas, it even surpasses them, proving that open-source models can punch above their weight. DeepSeek\u2019s founder, Liang Wenfeng is said to be one of the few who puts right and wrong before profits and losses. He explains, \u201cIf the goal is to make applications, using the Llama structure for\n\nSource: https://arbisoft.com/blogs/deep-seek-r1-the-chinese-ai-powerhouse-outperforming-open-ai-s-o1-at-95-less-cost\nTitle: DeepSeek-R1 - The Chinese AI Powerhouse Outperforming OpenAI's o1 \u2014 at 95% Less Cost\nContent: means it learns to reason and solve problems independently\u2014no hand-holding required. The result is a model that is capable of self-verification, reflection, and creating detailed chain-of-thought (CoT) responses. This isn\u2019t just theory. DeepSeek-R1 is the first open-source model to prove that advanced reasoning in large language models can be achieved purely with RL. It\u2019s a game-changer for researchers and developers looking to push the boundaries of what AI can do. DeepSeek-R1 Vs OpenAI-O1 Let\u2019s take a closer look at how DeepSeek-R1 stacks up against OpenAI-o1 in terms of various benchmarks in this graph: Source: HuggingFace Here is a further comparison breakdown:1. PerformanceDeepSeek-R1 outperforms OpenAI-o1 on key benchmarks like AIME, MATH-500, and SWE-bench.Demonstrates higher accuracy and faster response times for complex problem-solving tasks.Excels in logical reasoning and analytical capabilities, making it ideal for reasoning-focused applications. 2. Development Time and\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:39.167438",
"type": "event",
"data": {
"type": "logs",
"content": "subquery_context_window",
"output": "\ud83d\udcc3 Source: https://www.linkedin.com/pulse/deepseek-r1-next-leap-ai-reasoning-logical-inference-pandiya-fwlqe\nTitle: DeepSeek-R1: The Next Leap in AI Reasoning and Logical Inference\nContent: Like its predecessor DeepSeek-V3, R1 utilizes a Mixture-of-Experts (MoE) architecture, which allows it to activate only a portion of its network per query. The benefits include:\nLike its predecessor DeepSeek-V3, R1 utilizes a\narchitecture, which allows it to\nactivate only a portion of its network per query\n. The benefits include:\nLower computational costs during inference. Higher efficiency in processing reasoning tasks. The ability to scale effectively without requiring expensive hardware upgrades.\nLower computational costs during inference. Higher efficiency in processing reasoning tasks. The ability to scale effectively without requiring expensive hardware upgrades.\nLower computational costs during inference. Higher efficiency in processing reasoning tasks. The ability to scale effectively without requiring expensive hardware upgrades.\nLower computational costs during inference.\nLower computational costs\nHigher efficiency in processing reasoning tasks.\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We have the model, its a 35b active, 640b Mixture of Experts We know that spec is 2-3m hours to train Models get worse with more compute after a certain point! https://www.harmdevries.com/ ... [image]Jaana Dogan / @rakyll: DeepSeek codebases are clean and well authored. I learned a lot by reading their work just over the weekend. You cannot deny that they are raising the bar, and wish we focus on quality instead of short sighted incremental work.@wordgrammer: Okay. Thanks for the nerd snipe guys. I spent the day learning exactly how DeepSeek trained at 1/30 the price, instead of working on my pitch deck. The tl;dr to everything, according to their papers:Karma / @0xkarmatic: The visible chains of thought in DeepSeek r1 makes it so easy to prompt it as you can clearly tell when your instructions were ambiguous. Missed opportunity from OpenAI to make their COTs visible. Now that the genie is out\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We have the model, its a 35b active, 640b Mixture of Experts We know that spec is 2-3m hours to train Models get worse with more compute after a certain point! https://www.harmdevries.com/ ... [image]Jaana Dogan / @rakyll: DeepSeek codebases are clean and well authored. I learned a lot by reading their work just over the weekend. You cannot deny that they are raising the bar, and wish we focus on quality instead of short sighted incremental work.@wordgrammer: Okay. Thanks for the nerd snipe guys. I spent the day learning exactly how DeepSeek trained at 1/30 the price, instead of working on my pitch deck. The tl;dr to everything, according to their papers:Karma / @0xkarmatic: The visible chains of thought in DeepSeek r1 makes it so easy to prompt it as you can clearly tell when your instructions were ambiguous. Missed opportunity from OpenAI to make their COTs visible. Now that the genie is out\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We have the model, its a 35b active, 640b Mixture of Experts We know that spec is 2-3m hours to train Models get worse with more compute after a certain point! https://www.harmdevries.com/ ... [image]Jaana Dogan / @rakyll: DeepSeek codebases are clean and well authored. I learned a lot by reading their work just over the weekend. You cannot deny that they are raising the bar, and wish we focus on quality instead of short sighted incremental work.@wordgrammer: Okay. Thanks for the nerd snipe guys. I spent the day learning exactly how DeepSeek trained at 1/30 the price, instead of working on my pitch deck. The tl;dr to everything, according to their papers:Karma / @0xkarmatic: The visible chains of thought in DeepSeek r1 makes it so easy to prompt it as you can clearly tell when your instructions were ambiguous. Missed opportunity from OpenAI to make their COTs visible. Now that the genie is out\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We have the model, its a 35b active, 640b Mixture of Experts We know that spec is 2-3m hours to train Models get worse with more compute after a certain point! https://www.harmdevries.com/ ... [image]Jaana Dogan / @rakyll: DeepSeek codebases are clean and well authored. I learned a lot by reading their work just over the weekend. You cannot deny that they are raising the bar, and wish we focus on quality instead of short sighted incremental work.@wordgrammer: Okay. Thanks for the nerd snipe guys. I spent the day learning exactly how DeepSeek trained at 1/30 the price, instead of working on my pitch deck. The tl;dr to everything, according to their papers:Karma / @0xkarmatic: The visible chains of thought in DeepSeek r1 makes it so easy to prompt it as you can clearly tell when your instructions were ambiguous. Missed opportunity from OpenAI to make their COTs visible. Now that the genie is out\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: @teknium1: Its crazy deepseek direct api has seemingly no rate limits of any kind\nEmad / @emostaque: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We have the model, its a 35b active, 640b Mixture of Experts We know that spec is 2-3m hours to train Models get worse with more compute after a certain point! https://www.harmdevries.com/ ... [image]\nJaana Dogan / @rakyll: DeepSeek codebases are clean and well authored. I learned a lot by reading their work just over the weekend. You cannot deny that they are raising the bar, and wish we focus on quality instead of short sighted incremental work.\n@wordgrammer: Okay. Thanks for the nerd snipe guys. I spent the day learning exactly how DeepSeek trained at 1/30 the price, instead of working on my pitch deck. The tl;dr to everything, according to their papers:\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: the unreleased OpenAI o3 at the same cost at coding on Codeforces and ARC-AGI! [image]Nat Friedman / @natfriedman: The deepseek team is obviously really good. China is full of talented engineers. Every other take is cope. Sorry.Morgan Brown / @morganb: 7/ The results are mind-blowing: - Training cost: $100M \u2192 $5M - GPUs needed: 100,000 \u2192 2,000 - API costs: 95% cheaper - Can run on gaming GPUs instead of data center hardwareOle Lehmann / @itsolelehmann: DeepSeek is a 100x more based name than ChatGpt or ClaudeEthan Mollick / @emollick: I think the market will adjust to any per token cost decrease brought on by DeepSeek quite quickly. Costs for GPT-4 level intelligence dropped by 1000x in the last 18 months. A 95% price drop in reasoning models seems not to be something that will break the labs.@teknium1: Its crazy deepseek direct api has seemingly no rate limits of any kindEmad / @emostaque: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: the unreleased OpenAI o3 at the same cost at coding on Codeforces and ARC-AGI! [image]Nat Friedman / @natfriedman: The deepseek team is obviously really good. China is full of talented engineers. Every other take is cope. Sorry.Morgan Brown / @morganb: 7/ The results are mind-blowing: - Training cost: $100M \u2192 $5M - GPUs needed: 100,000 \u2192 2,000 - API costs: 95% cheaper - Can run on gaming GPUs instead of data center hardwareOle Lehmann / @itsolelehmann: DeepSeek is a 100x more based name than ChatGpt or ClaudeEthan Mollick / @emollick: I think the market will adjust to any per token cost decrease brought on by DeepSeek quite quickly. Costs for GPT-4 level intelligence dropped by 1000x in the last 18 months. A 95% price drop in reasoning models seems not to be something that will break the labs.@teknium1: Its crazy deepseek direct api has seemingly no rate limits of any kindEmad / @emostaque: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: the unreleased OpenAI o3 at the same cost at coding on Codeforces and ARC-AGI! [image]Nat Friedman / @natfriedman: The deepseek team is obviously really good. China is full of talented engineers. Every other take is cope. Sorry.Morgan Brown / @morganb: 7/ The results are mind-blowing: - Training cost: $100M \u2192 $5M - GPUs needed: 100,000 \u2192 2,000 - API costs: 95% cheaper - Can run on gaming GPUs instead of data center hardwareOle Lehmann / @itsolelehmann: DeepSeek is a 100x more based name than ChatGpt or ClaudeEthan Mollick / @emollick: I think the market will adjust to any per token cost decrease brought on by DeepSeek quite quickly. Costs for GPT-4 level intelligence dropped by 1000x in the last 18 months. A 95% price drop in reasoning models seems not to be something that will break the labs.@teknium1: Its crazy deepseek direct api has seemingly no rate limits of any kindEmad / @emostaque: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We\n\nSource: https://www.techmeme.com/250127/p14\nTitle: Techmeme: DeepSeek could be an extinction-level event for venture capital firms that went all-in on foundational model companies; investors say they are not panicking (Dan Primack/Axios)\nContent: the unreleased OpenAI o3 at the same cost at coding on Codeforces and ARC-AGI! [image]Nat Friedman / @natfriedman: The deepseek team is obviously really good. China is full of talented engineers. Every other take is cope. Sorry.Morgan Brown / @morganb: 7/ The results are mind-blowing: - Training cost: $100M \u2192 $5M - GPUs needed: 100,000 \u2192 2,000 - API costs: 95% cheaper - Can run on gaming GPUs instead of data center hardwareOle Lehmann / @itsolelehmann: DeepSeek is a 100x more based name than ChatGpt or ClaudeEthan Mollick / @emollick: I think the market will adjust to any per token cost decrease brought on by DeepSeek quite quickly. Costs for GPT-4 level intelligence dropped by 1000x in the last 18 months. A 95% price drop in reasoning models seems not to be something that will break the labs.@teknium1: Its crazy deepseek direct api has seemingly no rate limits of any kindEmad / @emostaque: Simpler way to understand DeepSeek weren't lying about 50k H100s or training costs for V3/R1 We\n",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:39.189887",
"type": "event",
"data": {
"type": "logs",
"content": "research_step_finalized",
"output": "Finalized research step.\n\ud83d\udcb8 Total Research Costs: $0.0208465",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:39.208249",
"type": "event",
"data": {
"type": "logs",
"content": "generating_draft_sections",
"output": "\ud83d\udcd1 Generating draft section titles for 'DeepSeek V3 R1: Architecture and Innovations'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:47.647492",
"type": "event",
"data": {
"type": "logs",
"content": "draft_sections_generated",
"output": "\ud83d\uddc2\ufe0f Draft section titles generated for 'DeepSeek V3 R1: Architecture and Innovations'",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:47.668990",
"type": "event",
"data": {
"type": "logs",
"content": "fetching_relevant_written_content",
"output": "\ud83d\udd0e Getting relevant written content based on query: DeepSeek V3 R1: Architecture and Innovations...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:47.704690",
"type": "event",
"data": {
"type": "logs",
"content": "writing_report",
"output": "\u270d\ufe0f Writing report for 'DeepSeek V3 R1: Architecture and Innovations'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:57.587073",
"type": "event",
"data": {
"type": "logs",
"content": "report_written",
"output": "\ud83d\udcdd Report written for 'DeepSeek V3 R1: Architecture and Innovations'",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:57.635839",
"type": "event",
"data": {
"type": "logs",
"content": "starting_research",
"output": "\ud83d\udd0d Starting the research task for 'Performance Benchmarks and Impact on the AI Industry'...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:57.652395",
"type": "event",
"data": {
"type": "logs",
"content": "agent_generated",
"output": "\ud83e\udd16 AI Research Agent",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:33:57.662651",
"type": "event",
"data": {
"type": "logs",
"content": "planning_research",
"output": "\ud83c\udf10 Browsing the web to learn more about the task: Performance Benchmarks and Impact on the AI Industry...",
"metadata": null
}
},
{
"timestamp": "2025-01-29T21:34:05.668766",
"type": "event",
"data": {
"type": "logs",
"content": "planning_research",
"output": "\ud83e\udd14 Planning the research strategy and subtasks...",
"metadata": null
}
}
],
"content": {
"query": "",
"sources": [],
"context": [],
"report": "",
"costs": 0.0,
"type": "images",
"output": "[\"https://naixianzhang.com/wp-content/uploads/2025/01/image-57.png?w=1024\", \"https://naixianzhang.com/wp-content/uploads/2025/01/image-58.png?w=1024\", \"https://naixianzhang.com/wp-content/uploads/2025/01/image-59.png?w=1024\", \"https://naixianzhang.com/wp-content/uploads/2025/01/image-60.png?w=1024\", \"https://arbisoft.com/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fcontact.c5602fd6.png&w=1440&q=75\", \"https://us1.discourse-cdn.com/flex020/uploads/cursor1/optimized/3X/0/d/0df9e1f23791d1b8e362cb2bcf434e2bac1e7a09_2_419x500.png\", \"https://arbisoft.com/_next/image?url=https%3A%2F%2Fd1foa0aaimjyw4.cloudfront.net%2FBlog_Image_1_b56afb0c54.png&w=1920&q=75\"]",
"content": "selected_images",
"metadata": [
"https://naixianzhang.com/wp-content/uploads/2025/01/image-57.png?w=1024",
"https://naixianzhang.com/wp-content/uploads/2025/01/image-58.png?w=1024",
"https://naixianzhang.com/wp-content/uploads/2025/01/image-59.png?w=1024",
"https://naixianzhang.com/wp-content/uploads/2025/01/image-60.png?w=1024",
"https://arbisoft.com/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fcontact.c5602fd6.png&w=1440&q=75",
"https://us1.discourse-cdn.com/flex020/uploads/cursor1/optimized/3X/0/d/0df9e1f23791d1b8e362cb2bcf434e2bac1e7a09_2_419x500.png",
"https://arbisoft.com/_next/image?url=https%3A%2F%2Fd1foa0aaimjyw4.cloudfront.net%2FBlog_Image_1_b56afb0c54.png&w=1920&q=75"
]
}
}