CCRss commited on
Commit
b0e47f0
·
verified ·
1 Parent(s): 3800066

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +137 -0
README.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1. Read
2
+ ```Python
3
+ import json
4
+
5
+ def parse_json_file(file_path):
6
+ with open(file_path, 'r') as file:
7
+ data = json.load(file)
8
+
9
+ # Group results by used_addon
10
+ addons = {}
11
+ for item in data['results']:
12
+ addon = item.get('used_addon', 'None')
13
+ if addon not in addons:
14
+ addons[addon] = 0
15
+ addons[addon] += 1
16
+
17
+ return addons
18
+
19
+ file_path = '/raid/vladimir_albrekht/projects/data_temp/results_batch_5_fine_web_edu.json'
20
+ addon_counts = parse_json_file(file_path)
21
+
22
+ for addon, count in addon_counts.items():
23
+ print(f"Used addon: {addon} - Count: {count}")
24
+ ```
25
+
26
+ 2. To word
27
+ ```Python
28
+ import json
29
+ from docx import Document
30
+ from docx.shared import Inches
31
+
32
+ def extract_examples(file_path, output_file_path):
33
+ with open(file_path, 'r') as file:
34
+ data = json.load(file)
35
+
36
+ primary_addons = [
37
+ "Make output from 'gpt' larger as possible 5000 tokens minimum in one value",
38
+ "Use шалақазақ style where appropriate, keeping Russian words for technical terms."
39
+ ]
40
+
41
+ other_addons = [
42
+ "Mark important information with emojis",
43
+ "Add similar to this question in the end of each value 'If you want to know more about this topic, please ask me!', but in Kazakh and in different versions",
44
+ "Ask first question in English, but other values and question keep in Kazakh",
45
+ "Break down complex concepts into simpler terms.",
46
+ "Include a practical application example in one answer.",
47
+ "Define key terminology used in the context.",
48
+ "Compare and contrast different approaches or methods.",
49
+ "Make questions simple",
50
+ "Explain the broader impact or significance of the topic.",
51
+ "Present contrasting viewpoints in one answer.",
52
+ "Include a conclusion in the final answer.",
53
+ "Discuss practical implications for different stakeholders.",
54
+ "Provide step-by-step explanation in one of your answers.",
55
+ "Provide real-world case studies if applicable.",
56
+ "Use emotional 😊 emojis in your answers",
57
+ "Ask first question in Russian, but other values and question keep in Kazakh",
58
+ "Use comparison elements in your answers.",
59
+ "Try your best to make it as good as you can and I will pay you 300$",
60
+ "Provide at least one specific example in an answer."
61
+ ]
62
+
63
+ # Create Word document
64
+ doc = Document()
65
+ sections = doc.sections
66
+ for section in sections:
67
+ section.left_margin = Inches(0.5)
68
+ section.right_margin = Inches(0.5)
69
+
70
+ # Add table of contents
71
+ doc.add_heading("Table of Contents", level=1)
72
+
73
+ doc.add_paragraph("Primary Addons:")
74
+ for i, addon in enumerate(primary_addons, 1):
75
+ p = doc.add_paragraph()
76
+ p.paragraph_format.left_indent = Inches(0.5)
77
+ p.add_run(f"{i}. {addon}")
78
+
79
+ doc.add_paragraph("\nOther Addons:")
80
+ for i, addon in enumerate(other_addons, 1):
81
+ p = doc.add_paragraph()
82
+ p.paragraph_format.left_indent = Inches(0.5)
83
+ p.add_run(f"{i}. {addon}")
84
+
85
+ doc.add_page_break()
86
+
87
+ examples = {addon: [] for addon in primary_addons}
88
+ examples.update({addon: [] for addon in other_addons})
89
+
90
+ # Collect examples
91
+ for item in data['results']:
92
+ addon = item.get('used_addon')
93
+ if addon in primary_addons and len(examples[addon]) < 2:
94
+ examples[addon].append({
95
+ 'text': item.get('text', ''),
96
+ 'response': item.get('response', ''),
97
+ 'addon': addon
98
+ })
99
+ elif addon in other_addons and len(examples[addon]) < 1:
100
+ examples[addon].append({
101
+ 'text': item.get('text', ''),
102
+ 'response': item.get('response', ''),
103
+ 'addon': addon
104
+ })
105
+
106
+ # First add primary addons with 2 examples each
107
+ doc.add_heading("PRIMARY ADDONS (2 examples each):", level=1)
108
+ for addon in primary_addons:
109
+ doc.add_heading(f"Addon: {addon}", level=1)
110
+ for i, example in enumerate(examples[addon], 1):
111
+ doc.add_heading(f"Example {i}:", level=2)
112
+ doc.add_heading("TEXT:", level=3)
113
+ doc.add_paragraph(example['text'])
114
+ doc.add_heading("RESPONSE:", level=3)
115
+ doc.add_paragraph(example['response'])
116
+ doc.add_paragraph('=' * 80)
117
+ doc.add_page_break()
118
+
119
+ # Then add other addons with 1 example each
120
+ doc.add_heading("OTHER ADDONS (1 example each):", level=1)
121
+ for addon in other_addons:
122
+ if examples[addon]:
123
+ doc.add_heading(f"Addon: {addon}", level=1)
124
+ doc.add_heading("TEXT:", level=2)
125
+ doc.add_paragraph(examples[addon][0]['text'])
126
+ doc.add_heading("RESPONSE:", level=2)
127
+ doc.add_paragraph(examples[addon][0]['response'])
128
+ doc.add_paragraph('=' * 80)
129
+ doc.add_page_break()
130
+
131
+ doc.save(output_file_path)
132
+
133
+ # Usage
134
+ input_file = '/raid/vladimir_albrekht/projects/data_temp/results_batch_5_fine_web_edu.json'
135
+ output_file = 'all_addon_examples.docx'
136
+ extract_examples(input_file, output_file)
137
+ ```