Spaces:
Running
Running
Update extract.lean
Browse files- extract.lean +21 -14
extract.lean
CHANGED
@@ -10,26 +10,30 @@ open Lean.Elab
|
|
10 |
open Lean.Elab.Command
|
11 |
|
12 |
/-
|
13 |
-
This file is modified from: https://github.com/semorrison/lean-training-data
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
Put this extract.lean in LeanProj/scripts/extract.lean
|
18 |
|
19 |
In LeanProj/lakefile.lean, below
|
20 |
@[default_target]
|
21 |
lean_lib «LeanProj» where
|
22 |
-
```,
|
23 |
|
24 |
add this:
|
25 |
@[default_target]
|
26 |
lean_exe extract where
|
27 |
root := `scripts.extract
|
28 |
supportInterpreter := true
|
29 |
-
|
|
|
|
|
30 |
|
31 |
Now in the terminal, run:
|
32 |
-
cd LeanProj
|
33 |
mkdir data
|
34 |
lake exe extract > data/theorems.txt
|
35 |
|
@@ -63,7 +67,10 @@ def main : IO Unit := do
|
|
63 |
IO.eprintln "\n\n"
|
64 |
IO.eprintln "importing modules"
|
65 |
searchPathRef.set compile_time_search_path%
|
66 |
-
CoreM.withImportModules #[`Mathlib]
|
|
|
|
|
|
|
67 |
IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}"
|
68 |
let env ← getEnv
|
69 |
|
@@ -83,38 +90,38 @@ def main : IO Unit := do
|
|
83 |
-- name.toString.startsWith "Nat.add"
|
84 |
IO.eprintln s!"printing {lst.size} theorems"
|
85 |
for (name, info) in lst do
|
86 |
-
IO.println "----
|
87 |
-- print module.
|
88 |
-- eg. "import Mathlib.Data.Real.Basic"
|
89 |
match (← findModuleOf? name) with
|
90 |
| .none => IO.println "unknown"
|
91 |
| .some mod => IO.println s!"import {mod}"
|
92 |
-
IO.println "--
|
93 |
|
94 |
-- print kine and decl name
|
95 |
-- eg. "theorem Nat.add_comm" or "def Nat.add"
|
96 |
IO.println s!"{info.kind} {name}"
|
97 |
-
IO.println "--
|
98 |
|
99 |
-- print decl type.
|
100 |
-- eg. "∀ (n m : Nat), n + m = m + n"
|
101 |
-- ppExpr is better than dbgToString
|
102 |
let ppType := toString (← MetaM.run' do ppExpr info.type)
|
103 |
truncatePrint ppType
|
104 |
-
IO.println "--
|
105 |
|
106 |
-- print doc string if exists.
|
107 |
-- eg. O(|l| + |r|). Merge two lists using s as a switch.
|
108 |
match (← findDocString? env name) with
|
109 |
| .none => IO.println ""
|
110 |
| .some doc => IO.println doc
|
111 |
-
-- IO.println "--
|
112 |
|
113 |
-- print decl value. These Lean.Expr are very large.
|
114 |
-- match info.value? with
|
115 |
-- | .none => IO.println ""
|
116 |
-- | .some val => IO.println (toString (← MetaM.run' do ppExpr val))
|
117 |
-
-- IO.println "--
|
118 |
|
119 |
x := x + 1
|
120 |
if x % 100 == 0 then
|
|
|
10 |
open Lean.Elab.Command
|
11 |
|
12 |
/-
|
13 |
+
This small file is modified from: https://github.com/semorrison/lean-training-data
|
14 |
+
You can refer to the file structure there for more details.
|
15 |
+
To run this, there is no need to download another 4GB of Mathlib files.
|
16 |
+
Just prepare a Lean project with Mathlib installed. Suppose the project is in the folder LeanProj.
|
17 |
+
Your Mathlib files should live in LeanProj/.lake/packages/mathlib.
|
18 |
+
My LeanProj/lean-toolchain says I am using version: leanprover/lean4:v4.9.0-rc2
|
19 |
|
20 |
+
Now, put this extract.lean in LeanProj/scripts/extract.lean
|
|
|
|
|
21 |
|
22 |
In LeanProj/lakefile.lean, below
|
23 |
@[default_target]
|
24 |
lean_lib «LeanProj» where
|
|
|
25 |
|
26 |
add this:
|
27 |
@[default_target]
|
28 |
lean_exe extract where
|
29 |
root := `scripts.extract
|
30 |
supportInterpreter := true
|
31 |
+
|
32 |
+
This will make the `lake exe extract` command available,
|
33 |
+
and enable the correct `compile_time_search_path%` in the script.
|
34 |
|
35 |
Now in the terminal, run:
|
36 |
+
cd path-to-LeanProj
|
37 |
mkdir data
|
38 |
lake exe extract > data/theorems.txt
|
39 |
|
|
|
67 |
IO.eprintln "\n\n"
|
68 |
IO.eprintln "importing modules"
|
69 |
searchPathRef.set compile_time_search_path%
|
70 |
+
CoreM.withImportModules #[`Mathlib] (options := {entries := [
|
71 |
+
-- (`maxHeartbeats, .ofNat 0),
|
72 |
+
(`smartUnfolding, .ofBool false),
|
73 |
+
]}) do
|
74 |
IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}"
|
75 |
let env ← getEnv
|
76 |
|
|
|
90 |
-- name.toString.startsWith "Nat.add"
|
91 |
IO.eprintln s!"printing {lst.size} theorems"
|
92 |
for (name, info) in lst do
|
93 |
+
IO.println "----thm----"
|
94 |
-- print module.
|
95 |
-- eg. "import Mathlib.Data.Real.Basic"
|
96 |
match (← findModuleOf? name) with
|
97 |
| .none => IO.println "unknown"
|
98 |
| .some mod => IO.println s!"import {mod}"
|
99 |
+
IO.println "--thm--"
|
100 |
|
101 |
-- print kine and decl name
|
102 |
-- eg. "theorem Nat.add_comm" or "def Nat.add"
|
103 |
IO.println s!"{info.kind} {name}"
|
104 |
+
IO.println "--thm--"
|
105 |
|
106 |
-- print decl type.
|
107 |
-- eg. "∀ (n m : Nat), n + m = m + n"
|
108 |
-- ppExpr is better than dbgToString
|
109 |
let ppType := toString (← MetaM.run' do ppExpr info.type)
|
110 |
truncatePrint ppType
|
111 |
+
IO.println "--thm--"
|
112 |
|
113 |
-- print doc string if exists.
|
114 |
-- eg. O(|l| + |r|). Merge two lists using s as a switch.
|
115 |
match (← findDocString? env name) with
|
116 |
| .none => IO.println ""
|
117 |
| .some doc => IO.println doc
|
118 |
+
-- IO.println "--thm--"
|
119 |
|
120 |
-- print decl value. These Lean.Expr are very large.
|
121 |
-- match info.value? with
|
122 |
-- | .none => IO.println ""
|
123 |
-- | .some val => IO.println (toString (← MetaM.run' do ppExpr val))
|
124 |
+
-- IO.println "--thm--"
|
125 |
|
126 |
x := x + 1
|
127 |
if x % 100 == 0 then
|