dx2102 commited on
Commit
ec0626f
1 Parent(s): 883c5b4

Update extract.lean

Browse files
Files changed (1) hide show
  1. extract.lean +21 -14
extract.lean CHANGED
@@ -10,26 +10,30 @@ open Lean.Elab
10
  open Lean.Elab.Command
11
 
12
  /-
13
- This file is modified from: https://github.com/semorrison/lean-training-data
 
 
 
 
 
14
 
15
- To run this, prepare a Lean project with Mathlib installed. Suppose the project is called LeanProj.
16
-
17
- Put this extract.lean in LeanProj/scripts/extract.lean
18
 
19
  In LeanProj/lakefile.lean, below
20
  @[default_target]
21
  lean_lib «LeanProj» where
22
- ```,
23
 
24
  add this:
25
  @[default_target]
26
  lean_exe extract where
27
  root := `scripts.extract
28
  supportInterpreter := true
29
- ```
 
 
30
 
31
  Now in the terminal, run:
32
- cd LeanProj
33
  mkdir data
34
  lake exe extract > data/theorems.txt
35
 
@@ -63,7 +67,10 @@ def main : IO Unit := do
63
  IO.eprintln "\n\n"
64
  IO.eprintln "importing modules"
65
  searchPathRef.set compile_time_search_path%
66
- CoreM.withImportModules #[`Mathlib] do
 
 
 
67
  IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}"
68
  let env ← getEnv
69
 
@@ -83,38 +90,38 @@ def main : IO Unit := do
83
  -- name.toString.startsWith "Nat.add"
84
  IO.eprintln s!"printing {lst.size} theorems"
85
  for (name, info) in lst do
86
- IO.println "----ast----"
87
  -- print module.
88
  -- eg. "import Mathlib.Data.Real.Basic"
89
  match (← findModuleOf? name) with
90
  | .none => IO.println "unknown"
91
  | .some mod => IO.println s!"import {mod}"
92
- IO.println "--ast--"
93
 
94
  -- print kine and decl name
95
  -- eg. "theorem Nat.add_comm" or "def Nat.add"
96
  IO.println s!"{info.kind} {name}"
97
- IO.println "--ast--"
98
 
99
  -- print decl type.
100
  -- eg. "∀ (n m : Nat), n + m = m + n"
101
  -- ppExpr is better than dbgToString
102
  let ppType := toString (← MetaM.run' do ppExpr info.type)
103
  truncatePrint ppType
104
- IO.println "--ast--"
105
 
106
  -- print doc string if exists.
107
  -- eg. O(|l| + |r|). Merge two lists using s as a switch.
108
  match (← findDocString? env name) with
109
  | .none => IO.println ""
110
  | .some doc => IO.println doc
111
- -- IO.println "--ast--"
112
 
113
  -- print decl value. These Lean.Expr are very large.
114
  -- match info.value? with
115
  -- | .none => IO.println ""
116
  -- | .some val => IO.println (toString (← MetaM.run' do ppExpr val))
117
- -- IO.println "--ast--"
118
 
119
  x := x + 1
120
  if x % 100 == 0 then
 
10
  open Lean.Elab.Command
11
 
12
  /-
13
+ This small file is modified from: https://github.com/semorrison/lean-training-data
14
+ You can refer to the file structure there for more details.
15
+ To run this, there is no need to download another 4GB of Mathlib files.
16
+ Just prepare a Lean project with Mathlib installed. Suppose the project is in the folder LeanProj.
17
+ Your Mathlib files should live in LeanProj/.lake/packages/mathlib.
18
+ My LeanProj/lean-toolchain says I am using version: leanprover/lean4:v4.9.0-rc2
19
 
20
+ Now, put this extract.lean in LeanProj/scripts/extract.lean
 
 
21
 
22
  In LeanProj/lakefile.lean, below
23
  @[default_target]
24
  lean_lib «LeanProj» where
 
25
 
26
  add this:
27
  @[default_target]
28
  lean_exe extract where
29
  root := `scripts.extract
30
  supportInterpreter := true
31
+
32
+ This will make the `lake exe extract` command available,
33
+ and enable the correct `compile_time_search_path%` in the script.
34
 
35
  Now in the terminal, run:
36
+ cd path-to-LeanProj
37
  mkdir data
38
  lake exe extract > data/theorems.txt
39
 
 
67
  IO.eprintln "\n\n"
68
  IO.eprintln "importing modules"
69
  searchPathRef.set compile_time_search_path%
70
+ CoreM.withImportModules #[`Mathlib] (options := {entries := [
71
+ -- (`maxHeartbeats, .ofNat 0),
72
+ (`smartUnfolding, .ofBool false),
73
+ ]}) do
74
  IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}"
75
  let env ← getEnv
76
 
 
90
  -- name.toString.startsWith "Nat.add"
91
  IO.eprintln s!"printing {lst.size} theorems"
92
  for (name, info) in lst do
93
+ IO.println "----thm----"
94
  -- print module.
95
  -- eg. "import Mathlib.Data.Real.Basic"
96
  match (← findModuleOf? name) with
97
  | .none => IO.println "unknown"
98
  | .some mod => IO.println s!"import {mod}"
99
+ IO.println "--thm--"
100
 
101
  -- print kine and decl name
102
  -- eg. "theorem Nat.add_comm" or "def Nat.add"
103
  IO.println s!"{info.kind} {name}"
104
+ IO.println "--thm--"
105
 
106
  -- print decl type.
107
  -- eg. "∀ (n m : Nat), n + m = m + n"
108
  -- ppExpr is better than dbgToString
109
  let ppType := toString (← MetaM.run' do ppExpr info.type)
110
  truncatePrint ppType
111
+ IO.println "--thm--"
112
 
113
  -- print doc string if exists.
114
  -- eg. O(|l| + |r|). Merge two lists using s as a switch.
115
  match (← findDocString? env name) with
116
  | .none => IO.println ""
117
  | .some doc => IO.println doc
118
+ -- IO.println "--thm--"
119
 
120
  -- print decl value. These Lean.Expr are very large.
121
  -- match info.value? with
122
  -- | .none => IO.println ""
123
  -- | .some val => IO.println (toString (← MetaM.run' do ppExpr val))
124
+ -- IO.println "--thm--"
125
 
126
  x := x + 1
127
  if x % 100 == 0 then