Spaces:
Sleeping
Sleeping
Dmitry Kirsanov commited on
Commit ·
9a5a6dc
1
Parent(s): d1d70b6
more fact-finding
Browse files- cl-cleanup.py +6 -0
- understand.py +46 -13
cl-cleanup.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import parse, sys
|
| 2 |
+
|
| 3 |
+
soup = parse.parse(sys.argv[1])
|
| 4 |
+
|
| 5 |
+
with open(sys.argv[1]+"-cleaned.html", "w", encoding="utf-8") as file:
|
| 6 |
+
file.write(str(soup))
|
understand.py
CHANGED
|
@@ -10,29 +10,62 @@ class Document():
|
|
| 10 |
self.doctype = None
|
| 11 |
self.summary = None
|
| 12 |
|
| 13 |
-
def
|
| 14 |
try:
|
| 15 |
-
answer, truncated = self.LLM.send_question(
|
| 16 |
-
|
| 17 |
-
<document>{self.fad}</document>""")
|
| 18 |
-
|
| 19 |
return answer.strip()
|
| 20 |
-
|
| 21 |
except Exception as e:
|
| 22 |
print (e.message, e.args)
|
| 23 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def query_summary(self):
|
| 26 |
-
|
| 27 |
-
|
| 28 |
|
| 29 |
-
|
| 30 |
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
except Exception as e:
|
| 34 |
-
print (e.message, e.args)
|
| 35 |
-
return None
|
| 36 |
|
| 37 |
def understand(self, filename):
|
| 38 |
self.documentSoup = parse.parse(filename)
|
|
|
|
| 10 |
self.doctype = None
|
| 11 |
self.summary = None
|
| 12 |
|
| 13 |
+
def query(self, s):
|
| 14 |
try:
|
| 15 |
+
answer, truncated = self.LLM.send_question(s)
|
|
|
|
|
|
|
|
|
|
| 16 |
return answer.strip()
|
|
|
|
| 17 |
except Exception as e:
|
| 18 |
print (e.message, e.args)
|
| 19 |
return None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def query_doctype(self):
|
| 23 |
+
return self.query(
|
| 24 |
+
f"""Please read this document carefully. Remember everything you know about the concepts it touches.
|
| 25 |
+
|
| 26 |
+
Give a brief description of what this document is. Examples: "Textbook about interpersonal psychology", "State standard on oil rig equipment", "User manual for a smartphone app".
|
| 27 |
+
|
| 28 |
+
Write just one line with the description.
|
| 29 |
+
|
| 30 |
+
<document>{self.fad}</document>""")
|
| 31 |
|
| 32 |
def query_summary(self):
|
| 33 |
+
return self.query(
|
| 34 |
+
f"""Below is a document inside <document></document>. It is a {self.doctype}.
|
| 35 |
|
| 36 |
+
Read it carefully. Remember everything you know about the concepts it touches.
|
| 37 |
|
| 38 |
+
Give a one-paragraph summary of the document's content. Write just one paragraph with the summary.
|
| 39 |
+
|
| 40 |
+
<document>{self.fad}</document>""")
|
| 41 |
+
|
| 42 |
+
def query_concepts(self):
|
| 43 |
+
return self.query(
|
| 44 |
+
f"""Below is a document inside <document></document>. It is a {self.doctype}.
|
| 45 |
+
|
| 46 |
+
Read it carefully. Remember everything you know about the concepts it touches.
|
| 47 |
+
|
| 48 |
+
Now, read it again, and compile a list of the main concepts that this document talks about. Output the line "CONCEPTS:" followed by a list of concepts, one per line.
|
| 49 |
+
|
| 50 |
+
Then, read it one more time, and think about who are the actors implied. Who are the parties that this document is authored by, is governed by, is addressed to? Often, these parties are not explicitly stated in the document, so you will need to use your common sense. See if you can compile a minimal list of the main actors that the document is concerned with. Output the line "ACTORS:" followed by a list of concepts, one per line.
|
| 51 |
+
|
| 52 |
+
For example, for an instruction manual for a chainsaw, you might want to output:
|
| 53 |
+
CONCEPTS:
|
| 54 |
+
Power tool
|
| 55 |
+
Safety
|
| 56 |
+
Work area
|
| 57 |
+
Saw chain
|
| 58 |
+
Wood
|
| 59 |
+
...
|
| 60 |
+
ACTORS:
|
| 61 |
+
Chainsaw manufacturer
|
| 62 |
+
Chainsaw user
|
| 63 |
+
Children and bystanders
|
| 64 |
+
...
|
| 65 |
+
|
| 66 |
+
<document>{self.fad}</document>""")
|
| 67 |
+
|
| 68 |
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
def understand(self, filename):
|
| 71 |
self.documentSoup = parse.parse(filename)
|