lora
adamo1139 commited on
Commit
341215a
1 Parent(s): fedda47

Upload 3 files

Browse files
procedure/corpus_QA_long_single.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import os library
2
+ import os
3
+ import json
4
+ import random
5
+ # Import requests library
6
+ import requests
7
+
8
+
9
+ preprompt = """
10
+ <|im_start|>system
11
+ You are an assistant that is great at interpreting text and creating questions based on the context<|im_end|>
12
+ <|im_start|>user
13
+ Below is an excerpt from a book. Based on this excerpt, please write 1 long detailed request/instruction and answer about the text. Make sure that answer follows the same style as the excerpt.
14
+ Request should start with "fill_in_yourself:" and every answer should start with "fill_in_yourself:" Request has to be very complex, detailed and in-depth. Do not mention that question comes from an excerpt in the question itself, ask about technical details.
15
+
16
+ CONTEXT START
17
+ """
18
+
19
+ afterprompt = """
20
+ CONTEXT STOP
21
+
22
+ Above is an excerpt from a book. Based on this excerpt, please write 1 detailed in-depth request/instruction and answer about the text. Make sure that answer follow the same style as the excerpt.
23
+
24
+ ASSISTANT:
25
+ Sure, below is an advanced instrustion and response that can be inferred based on the content of the CONTEXT, the person asking the question is "fill_in_yourself:" and the person who responds is called "fill_in_yourself:".
26
+ I made sure that the instruction is in context to the CONTEXT.
27
+ I made sure that the style of the response matches the style of the excerpt.
28
+
29
+ fill_in_yourself:"""
30
+
31
+
32
+ def call_api(prompt, config):
33
+ url = "http://127.0.0.1:5001/api/v1/generate"
34
+
35
+ with open(config, "r", encoding="utf-8") as config_file:
36
+ config_data = json.load(config_file)
37
+
38
+ data = {
39
+ "prompt": f"{prompt}",
40
+ **config_data,
41
+ }
42
+ response = requests.post(url, json=data)
43
+
44
+ try:
45
+ response_json = response.json()
46
+ response_text = response_json.get("results", [{}])[0].get("text", "")
47
+ return response_text
48
+ except json.JSONDecodeError:
49
+ print("API response could not be decoded as JSON.")
50
+ return ""
51
+ while True:
52
+ # Construct the file name using string formatting
53
+ file_name = "fill_in_yourself/book_cleaned.txt"
54
+ # Call the action function with the file name
55
+ # Check if the file exists
56
+ if os.path.exists(file_name):
57
+ # Open the file in read mode
58
+ with open(file_name, encoding="utf8", errors="ignore") as f:
59
+ # Read the file content
60
+ text = f.read()
61
+ # Get the length of the text
62
+ length = len(text)
63
+ # Define an empty list to store the chunks
64
+ chunks = []
65
+ # Loop through the text with a step of 1000
66
+ for i in range(0, length, 11000):
67
+ # Get a slice of 1000 characters from the text
68
+ chunk = text[i:i+11000]
69
+ # Append the chunk to the list
70
+ chunks.append(chunk)
71
+ # Store the list in a variable
72
+ output = chunks
73
+ chunkcount = str(len(output))
74
+ # Define the url of the koboldcpp api
75
+ url = "http://127.0.0.1:5001/api/v1/generate"
76
+ # Define an empty list to store the responses from the koboldcpp api
77
+ responses = []
78
+ # Loop through the output list
79
+ file_size_limit = 50 * 1024 * 1024 # 50 megabytes
80
+ corpus_file_name = "fill_in_yourself/book_corpus1.txt"
81
+ corpus_file = open(corpus_file_name, "a", encoding="utf-8")
82
+ k = 0
83
+ for chunk in output:
84
+ k = k + 1
85
+ ki = str(k)
86
+ progress = "\nProcessing chunk " + ki + " out of " + chunkcount + " chunks\n"
87
+ print(progress)
88
+ data1 = preprompt + chunk + afterprompt
89
+ data = data1.encode("utf-8")
90
+ header = {"Content-Type": "text/plain; charset=utf-8"}
91
+ # Send a post request with the chunk as data
92
+ response = response = call_api(data, "config.json")
93
+ # Check if the response is successful
94
+ if response:
95
+ # Store the response in a variable
96
+ result = "fill_in_yourself:" + response
97
+ result = "<s>" + result + "</s>"
98
+ # Append the result to the responses list
99
+ responses.append(result)
100
+ # Print the result with a newline
101
+ print(result + "\n")
102
+ corpus_file.write(result + "\n\n\n")
103
+ corpus_file.flush() # Ensure data is written immediately
104
+ #Check if the file size exceeds the limit
105
+ if os.path.getsize(corpus_file_name) > file_size_limit:
106
+ break
107
+ else:
108
+ # Print an error message
109
+ print("Something went wrong. Please check the url and the chunk.")
110
+ else:
111
+ # Print an error message
112
+ print("The file does not exist. Please check the file name and location.")
procedure/corpus_QA_long_single_differential_complexity.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import os library
2
+ import os
3
+ import json
4
+ import random
5
+ # Import requests library
6
+ import requests
7
+
8
+
9
+ preprompt = """
10
+ <|im_start|>system
11
+ You are an assistant that is great at interpreting text and creating questions based on the context<|im_end|>
12
+ <|im_start|>user
13
+ Below is an excerpt from a book. Based on this excerpt, please write 1 long easy instruction and very detailed and exhaustive response. Make sure that answer follows the same style as the excerpt.
14
+ Request should start with "fill_in_yourself:" and every answer should start with "fill_in_yourself:" Request has to be very complex, detailed and in-depth. Do not mention that question comes from an excerpt or the book in the question itself, ask about technical details.
15
+
16
+ CONTEXT START
17
+ """
18
+
19
+ afterprompt = """
20
+ CONTEXT STOP
21
+
22
+ Above is an excerpt from a book. Based on this excerpt, please write 1 long easy instruction and very detailed and exhaustive response. Make sure that answer follows the same style as the excerpt.<|im_end|>
23
+ <|im_start|>assistant
24
+ Sure, below is an advanced instrustion and response that can be inferred based on the content of the CONTEXT, the person asking the question is "fill_in_yourself:" and the person who responds is called "fill_in_yourself:".
25
+ I made sure that the instruction is in context to the CONTEXT.
26
+ I made sure that the style of the response matches the style of the excerpt.
27
+ I made sure that instruction is not complicated, yet the response is very exhaustive and detailed.
28
+
29
+ fill_in_yourself:"""
30
+
31
+
32
+ def call_api(prompt, config):
33
+ url = "http://127.0.0.1:5001/api/v1/generate"
34
+
35
+ with open(config, "r", encoding="utf-8") as config_file:
36
+ config_data = json.load(config_file)
37
+
38
+ data = {
39
+ "prompt": f"{prompt}",
40
+ **config_data,
41
+ }
42
+ response = requests.post(url, json=data)
43
+
44
+ try:
45
+ response_json = response.json()
46
+ response_text = response_json.get("results", [{}])[0].get("text", "")
47
+ return response_text
48
+ except json.JSONDecodeError:
49
+ print("API response could not be decoded as JSON.")
50
+ return ""
51
+ while True:
52
+ # Construct the file name using string formatting
53
+ file_name = "3d_printing_basics/book_cleaned.txt"
54
+ # Call the action function with the file name
55
+ # Check if the file exists
56
+ if os.path.exists(file_name):
57
+ # Open the file in read mode
58
+ with open(file_name, encoding="utf8", errors="ignore") as f:
59
+ # Read the file content
60
+ text = f.read()
61
+ # Get the length of the text
62
+ length = len(text)
63
+ # Define an empty list to store the chunks
64
+ chunks = []
65
+ # Loop through the text with a step of 1000
66
+ for i in range(0, length, 11000):
67
+ # Get a slice of 1000 characters from the text
68
+ chunk = text[i:i+11000]
69
+ # Append the chunk to the list
70
+ chunks.append(chunk)
71
+ # Store the list in a variable
72
+ output = chunks
73
+ chunkcount = str(len(output))
74
+ # Define the url of the koboldcpp api
75
+ url = "http://127.0.0.1:5001/api/v1/generate"
76
+ # Define an empty list to store the responses from the koboldcpp api
77
+ responses = []
78
+ # Loop through the output list
79
+ file_size_limit = 50 * 1024 * 1024 # 50 megabytes
80
+ corpus_file_name = "fill_in_yourself/book_corpus1.txt"
81
+ corpus_file = open(corpus_file_name, "a", encoding="utf-8")
82
+ k = 0
83
+ for chunk in output:
84
+ k = k + 1
85
+ ki = str(k)
86
+ progress = "\nProcessing chunk " + ki + " out of " + chunkcount + " chunks\n"
87
+ print(progress)
88
+ data1 = preprompt + chunk + afterprompt
89
+ data = data1.encode("utf-8")
90
+ header = {"Content-Type": "text/plain; charset=utf-8"}
91
+ # Send a post request with the chunk as data
92
+ response = response = call_api(data, "config.json")
93
+ # Check if the response is successful
94
+ if response:
95
+ # Store the response in a variable
96
+ result = "fill_in_yourself:" + response
97
+ result = "<s>" + result + "</s>"
98
+ # Append the result to the responses list
99
+ responses.append(result)
100
+ # Print the result with a newline
101
+ print(result + "\n")
102
+ corpus_file.write(result + "\n\n\n")
103
+ corpus_file.flush() # Ensure data is written immediately
104
+ #Check if the file size exceeds the limit
105
+ if os.path.getsize(corpus_file_name) > file_size_limit:
106
+ break
107
+ else:
108
+ # Print an error message
109
+ print("Something went wrong. Please check the url and the chunk.")
110
+ else:
111
+ # Print an error message
112
+ print("The file does not exist. Please check the file name and location.")
procedure/corpus_QA_qa5x.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import os library
2
+ import os
3
+ import json
4
+ import random
5
+ # Import requests library
6
+ import requests
7
+
8
+
9
+ preprompt = """
10
+ <|im_start|>system
11
+ You are an assistant that is great at interpreting text and creating questions based on the context<|im_end|>
12
+ <|im_start|>user
13
+ Below is an excerpt from a book. Based on this excerpt, please write 5 detailed in-depth Questions and answers about the text. Make sure that answers follow the same style as the excerpt.
14
+ Every question should start with "fill_in_yourself:" and every answer should start with "fill_in_yourself:" Questions have to be very complex, detailed and in-depth.
15
+
16
+ CONTEXT START
17
+ """
18
+
19
+ afterprompt = """
20
+ CONTEXT STOP
21
+
22
+ Above is an excerpt from a book. Based on this excerpt, please write 5 detailed in-depth Questions and answers about the text. Make sure that answers follow the same style as the excerpt.
23
+
24
+ ASSISTANT:
25
+ Sure, below are 5 detailed and complex questions and answers that can be inferred based on the content of the CONTEXT, the person asking the question is "fill_in_yourself:" and the person who responds is called "fill_in_yourself:".
26
+ I made sure that the questions are created are in context to the CONTEXT.
27
+ I also made sure to create multiple questions, I won't stop at one!
28
+ I made sure that the style of the response matches the style of the excerpt.
29
+
30
+ (Question 1 of 5)
31
+ fill_in_yourself:
32
+ """
33
+
34
+
35
+ def call_api(prompt, config):
36
+ url = "http://127.0.0.1:5001/api/v1/generate"
37
+
38
+ with open(config, "r", encoding="utf-8") as config_file:
39
+ config_data = json.load(config_file)
40
+
41
+ data = {
42
+ "prompt": f"{prompt}",
43
+ **config_data,
44
+ }
45
+ response = requests.post(url, json=data)
46
+
47
+ try:
48
+ response_json = response.json()
49
+ response_text = response_json.get("results", [{}])[0].get("text", "")
50
+ return response_text
51
+ except json.JSONDecodeError:
52
+ print("API response could not be decoded as JSON.")
53
+ return ""
54
+ while True:
55
+ # Construct the file name using string formatting
56
+ file_name = "fill_in_yourself/book_cleaned.txt"
57
+ # Call the action function with the file name
58
+ # Check if the file exists
59
+ if os.path.exists(file_name):
60
+ # Open the file in read mode
61
+ with open(file_name, encoding="utf8", errors="ignore") as f:
62
+ # Read the file content
63
+ text = f.read()
64
+ # Get the length of the text
65
+ length = len(text)
66
+ # Define an empty list to store the chunks
67
+ chunks = []
68
+ # Loop through the text with a step of 1000
69
+ for i in range(0, length, 12000):
70
+ # Get a slice of 1000 characters from the text
71
+ chunk = text[i:i+12000]
72
+ # Append the chunk to the list
73
+ chunks.append(chunk)
74
+ # Store the list in a variable
75
+ output = chunks
76
+ chunkcount = str(len(output))
77
+ # Define the url of the koboldcpp api
78
+ url = "http://127.0.0.1:5001/api/v1/generate"
79
+ # Define an empty list to store the responses from the koboldcpp api
80
+ responses = []
81
+ # Loop through the output list
82
+ file_size_limit = 50 * 1024 * 1024 # 50 megabytes
83
+ corpus_file_name = "fill_in_yourself/book_corpus1.txt"
84
+ corpus_file = open(corpus_file_name, "a", encoding="utf-8")
85
+ k = 0
86
+ for chunk in output:
87
+ k = k + 1
88
+ ki = str(k)
89
+ progress = "\nProcessing chunk " + ki + " out of " + chunkcount + " chunks\n"
90
+ print(progress)
91
+ data1 = preprompt + chunk + afterprompt
92
+ data = data1.encode("utf-8")
93
+ header = {"Content-Type": "text/plain; charset=utf-8"}
94
+ # Send a post request with the chunk as data
95
+ response = response = call_api(data, "config.json")
96
+ # Check if the response is successful
97
+ if response:
98
+ # Store the response in a variable
99
+ result = "fill_in_yourself:" + response
100
+ result = "<s>" + result + "</s>"
101
+ # Append the result to the responses list
102
+ responses.append(result)
103
+ # Print the result with a newline
104
+ print(result + "\n")
105
+ corpus_file.write(result + "\n\n\n")
106
+ corpus_file.flush() # Ensure data is written immediately
107
+ #Check if the file size exceeds the limit
108
+ if os.path.getsize(corpus_file_name) > file_size_limit:
109
+ break
110
+ else:
111
+ # Print an error message
112
+ print("Something went wrong. Please check the url and the chunk.")
113
+ else:
114
+ # Print an error message
115
+ print("The file does not exist. Please check the file name and location.")