Dmitry Kirsanov commited on
Commit
9a5a6dc
·
1 Parent(s): d1d70b6

more fact-finding

Browse files
Files changed (2) hide show
  1. cl-cleanup.py +6 -0
  2. understand.py +46 -13
cl-cleanup.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import parse, sys
2
+
3
+ soup = parse.parse(sys.argv[1])
4
+
5
+ with open(sys.argv[1]+"-cleaned.html", "w", encoding="utf-8") as file:
6
+ file.write(str(soup))
understand.py CHANGED
@@ -10,29 +10,62 @@ class Document():
10
  self.doctype = None
11
  self.summary = None
12
 
13
- def query_doctype(self):
14
  try:
15
- answer, truncated = self.LLM.send_question(f"""Please read this document carefully, remember all you know about the concepts it touches, and give a brief description of what this document is. Examples: "Textbook about interpersonal psychology", "State standard on oil rig equipment", "User manual for a smartphone app". Write just one line with the description.
16
-
17
- <document>{self.fad}</document>""")
18
-
19
  return answer.strip()
20
-
21
  except Exception as e:
22
  print (e.message, e.args)
23
  return None
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def query_summary(self):
26
- try:
27
- answer, truncated = self.LLM.send_question(f"""Below is a {self.doctype}. Please read it carefully, remember all you know about the concepts it touches, and give a one-paragraph summary of its content. Write just one paragraph with the summary.
28
 
29
- <document>{self.fad}</document>""")
30
 
31
- return answer.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- except Exception as e:
34
- print (e.message, e.args)
35
- return None
36
 
37
  def understand(self, filename):
38
  self.documentSoup = parse.parse(filename)
 
10
  self.doctype = None
11
  self.summary = None
12
 
13
+ def query(self, s):
14
  try:
15
+ answer, truncated = self.LLM.send_question(s)
 
 
 
16
  return answer.strip()
 
17
  except Exception as e:
18
  print (e.message, e.args)
19
  return None
20
+
21
+
22
+ def query_doctype(self):
23
+ return self.query(
24
+ f"""Please read this document carefully. Remember everything you know about the concepts it touches.
25
+
26
+ Give a brief description of what this document is. Examples: "Textbook about interpersonal psychology", "State standard on oil rig equipment", "User manual for a smartphone app".
27
+
28
+ Write just one line with the description.
29
+
30
+ <document>{self.fad}</document>""")
31
 
32
  def query_summary(self):
33
+ return self.query(
34
+ f"""Below is a document inside <document></document>. It is a {self.doctype}.
35
 
36
+ Read it carefully. Remember everything you know about the concepts it touches.
37
 
38
+ Give a one-paragraph summary of the document's content. Write just one paragraph with the summary.
39
+
40
+ <document>{self.fad}</document>""")
41
+
42
+ def query_concepts(self):
43
+ return self.query(
44
+ f"""Below is a document inside <document></document>. It is a {self.doctype}.
45
+
46
+ Read it carefully. Remember everything you know about the concepts it touches.
47
+
48
+ Now, read it again, and compile a list of the main concepts that this document talks about. Output the line "CONCEPTS:" followed by a list of concepts, one per line.
49
+
50
+ Then, read it one more time, and think about who are the actors implied. Who are the parties that this document is authored by, is governed by, is addressed to? Often, these parties are not explicitly stated in the document, so you will need to use your common sense. See if you can compile a minimal list of the main actors that the document is concerned with. Output the line "ACTORS:" followed by a list of concepts, one per line.
51
+
52
+ For example, for an instruction manual for a chainsaw, you might want to output:
53
+ CONCEPTS:
54
+ Power tool
55
+ Safety
56
+ Work area
57
+ Saw chain
58
+ Wood
59
+ ...
60
+ ACTORS:
61
+ Chainsaw manufacturer
62
+ Chainsaw user
63
+ Children and bystanders
64
+ ...
65
+
66
+ <document>{self.fad}</document>""")
67
+
68
 
 
 
 
69
 
70
  def understand(self, filename):
71
  self.documentSoup = parse.parse(filename)