File size: 4,657 Bytes
f91510c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import openai\n",
    "import os\n",
    "from langchain import OpenAI\n",
    "\n",
    "from langchain.chains import LLMChain, MapReduceChain\n",
    "from langchain import PromptTemplate\n",
    "from langchain import *\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = \"1706.03762.pdf\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValidationError",
     "evalue": "1 validation error for OpenAI\n__root__\n  Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass  `openai_api_key` as a named parameter. (type=value_error)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValidationError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m llm \u001b[39m=\u001b[39m OpenAI()\n",
      "File \u001b[0;32m~/mambaforge/envs/langchain/lib/python3.9/site-packages/pydantic/main.py:342\u001b[0m, in \u001b[0;36mpydantic.main.BaseModel.__init__\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mValidationError\u001b[0m: 1 validation error for OpenAI\n__root__\n  Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass  `openai_api_key` as a named parameter. (type=value_error)"
     ]
    }
   ],
   "source": [
    "llm = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt_template = \"\"\"\n",
    "\n",
    "Given the LaTeX source of the paper titled \"{title}” from Arxiv and some existing code, please generate a Jupyter notebook that implements and replicates the results from the paper EXACTLY WITH NO ERRORS. Start from the provided code and modify it as necessary to incorporate the new ideas from the paper. The notebook should be organized in a logical and coherent way, with sections and sub-sections that reflect the paper's organization. Assume that the notebook will be used by someone who is familiar with the general field of AI, but may not be familiar with the specific topic of the paper. Please include code, explanations, and visualizations where appropriate. The notebook should be written in Python using the PyTorch machine learning framework and should be easy to run on a Jupyter server.\n",
    "\n",
    "######################\n",
    "LaTeX Source for paper\n",
    "######################\n",
    "    {paper}\n",
    "    \n",
    "######################\n",
    "Previous Code\n",
    "######################\n",
    "\n",
    "    {code}\n",
    "\n",
    "######################\n",
    "Your Code Here\n",
    "######################\n",
    "\"\"\"\n",
    "\n",
    "PROMPT = PromptTemplate(\n",
    "    template=prompt_template, input_variables=[\"title\", \"paper\", \"code\"]\n",
    ")\n",
    "\n",
    "# chain = LLMChain(llm=llm, prompt=PROMPT)\n",
    "MapReduceChain()\n",
    "chain = MapReduceChain.from_params(llm=llm, prompt=PROMPT, text_splitter=RecursiveCharacterTextSplitter())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"main.tex\", \"r\") as f:\n",
    "    main_tex = f.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "chain.apply([{\"title\": \"Long Range Language Modeling via Gated State Spaces\", \"paper\": main_tex, \"code\": \"import torch\"}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "langchain",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "38273b2daeac471f0eac904bde99a8af597df3ec437acfdd6914b298b9a2825e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}