lora
adamo1139 commited on
Commit
3464e57
1 Parent(s): 341215a

Delete procedure/corpus_QA_book.py

Browse files
Files changed (1) hide show
  1. procedure/corpus_QA_book.py +0 -108
procedure/corpus_QA_book.py DELETED
@@ -1,108 +0,0 @@
1
- # Import os library
2
- import os
3
- import json
4
- import random
5
- # Import requests library
6
- import requests
7
-
8
- # Define the file name
9
- file_name = "raw-book-cleaned.txt"
10
-
11
- preprompt = """
12
- A chat.
13
- USER: Below is an excerpt from a book. Based on this excerpt, please write 5 Questions and answers about the text. Make sure that answers follow the same style as the excerpt.
14
- Every question should start with "Reader:" and every answer should start with "Thomas:"
15
-
16
- BOOK EXCERPT START
17
- """
18
-
19
- afterprompt = """
20
- BOOK EXCERPT STOP
21
-
22
- ASSISTANT:
23
- Sure, below are 5 questions and answers that can be inferred based on the content of the BOOK EXCERPT, the person asking the question is "Reader:" and the person who responds is called "Thomas:".
24
- I made sure that the questions are created are in context to the BOOK EXCERPT.
25
-
26
- Reader:
27
- """
28
-
29
-
30
- def call_api(prompt, config):
31
- url = "http://127.0.0.1:5001/api/v1/generate"
32
-
33
- with open(config, "r", encoding="utf-8") as config_file:
34
- config_data = json.load(config_file)
35
-
36
- data = {
37
- "prompt": f"{prompt}",
38
- **config_data,
39
- }
40
- response = requests.post(url, json=data)
41
-
42
- try:
43
- response_json = response.json()
44
- response_text = response_json.get("results", [{}])[0].get("text", "")
45
- return response_text
46
- except json.JSONDecodeError:
47
- print("API response could not be decoded as JSON.")
48
- return ""
49
-
50
-
51
- # Check if the file exists
52
- if os.path.exists(file_name):
53
- # Open the file in read mode
54
- with open(file_name, "r") as f:
55
- # Read the file content
56
- text = f.read()
57
- # Get the length of the text
58
- length = len(text)
59
- # Define an empty list to store the chunks
60
- chunks = []
61
- # Loop through the text with a step of 1000
62
- for i in range(0, length, 10000):
63
- # Get a slice of 1000 characters from the text
64
- chunk = text[i:i+10000]
65
- # Append the chunk to the list
66
- chunks.append(chunk)
67
- # Store the list in a variable
68
- output = chunks
69
- chunkcount = str(len(output))
70
- # Define the url of the koboldcpp api
71
- url = "http://127.0.0.1:5001/api/v1/generate"
72
- # Define an empty list to store the responses from the koboldcpp api
73
- responses = []
74
- # Loop through the output list
75
- file_size_limit = 50 * 1024 * 1024 # 50 megabytes
76
- corpus_file = open("autocorpus4.txt", "a", encoding="utf-8")
77
- k = 0
78
- while True:
79
- for chunk in output:
80
- k = k + 1
81
- ki = str(k)
82
- progress = "\nProcessing chunk " + ki + " out of " + chunkcount + " chunks\n"
83
- print(progress)
84
- data1 = preprompt + chunk + afterprompt
85
- data = data1.encode("utf-8")
86
- header = {"Content-Type": "text/plain; charset=utf-8"}
87
- # Send a post request with the chunk as data
88
- response = response = call_api(data, "config.json")
89
- # Check if the response is successful
90
- if response:
91
- # Store the response in a variable
92
- result = "Reader: " + response
93
-
94
- # Append the result to the responses list
95
- responses.append(result)
96
- # Print the result with a newline
97
- print(result + "\n")
98
- corpus_file.write(result + "\n\n\n")
99
- corpus_file.flush() # Ensure data is written immediately
100
- #Check if the file size exceeds the limit
101
- if os.path.getsize("autocorpus4.txt") > file_size_limit:
102
- break
103
- else:
104
- # Print an error message
105
- print("Something went wrong. Please check the url and the chunk.")
106
- else:
107
- # Print an error message
108
- print("The file does not exist. Please check the file name and location.")