import os r = [] lr = [] l = 0 for root, dirs, files in os.walk('.'): for x in root: if x == '\\': l += 1 r.append(root) lr.append(l) l = 0 print('Самым глубоким каталогом является: ') print(r[lr.index(max(lr))], ' --- ', max(lr), 'уровня') import os import re l = 0 folder = [] p = re.compile(r"[0-9]+", re.U) for root, dirs, files in os.walk('.'): for d in dirs: if p.search(d): folder.append(d) l += 1 print('Всего найдено {} папок с цифрами в названии.'.format(l)) print('\nВот они:') for x in folder: print(x) import re file = input("Какой файл открыть (введите путь к файлу)? ") with open(file, 'r', encoding = 'utf-8') as f: i = (len(re.findall(r'\b.+ing\b', f.read()))) print(i) import re file = input("Какой файл открыть (введите путь к файлу)? ") def open_file(text): with open(file, 'r', encoding = 'utf-8') as f: result = len(re.findall(r'\b.+ing\b', f.read())) print(result) import os import re l = 0 folder = [] p = re.compile(r"[0-9]+", re.U) for root, dirs, files in os.walk('.'): for d in dirs: if p.search(d): folder.append(d) l += 1 print('Всего найдено {} папок с цифрами в названии.'.format(l)) print('\nВот они:') for x in folder: print(x) import re file = open('insects.txt', 'r', encoding = 'utf-8') f = file.read() delete = re.sub('<.*?>', '', f, flags = re.DOTALL) change = 'комар((а(ми|х)?)|и|ы|о(в|м)|у|е)?([\s,.!\?:"\(\)\'»])' Change = 'Комар((а(ми|х)?)|и|ы|о(в|м)|у|е)?([\s,.!\?:"\(\)\'»])' m = re.sub(change, 'слон\\1\\2', delete) m = re.sub(Change, 'Слон\\1\\2', m) with open('elephants.txt', 'w', encoding = 'utf-8') as f: f.write(m) a = [] word = input('enter a word:') while word: a.append(word) word = input('enter a word:') for w in a: if len(str(w)) > 5: print(w) s = input('enter a word:') for i in range(len(s)): print(s[:i+1]) file = open('text.txt','r',encoding='utf-8') length1 = 0 length3 = 0 for word in file: if len(word) == 1: length1 += 1 elif len(word) == 3: lenght3 += 1 if length1 == 0: print('No words with length of 1 symbol') elif length3 == 0: print('No words with length of 3 symbols') else: print('In file '+str(length3/length1)+' times more words of length 3 than of words of length 1') file.close() print('Введите три числа:') a = int(input()) b = int(input()) c = int(input()) s = 0 if a + b == c: print('Число',c,'равно сумме первых двух чисел.') else: print('Число',c,'не равно сумме первых двух чисел.') if a * c + b == 0: x = c print('Число',c,'является решением линейного уравнения a * x + b = 0, где a - первое число, b - второе число.') else: print('Число',c,'не является решением линейного уравнения a * x + b = 0, где a - первое число, b - второе число.') import re with open('text.txt', 'r', encoding='utf-8') as f: exclude = '' f.readline() i = 0 for line in f: if exclude in line: break else: i+=1 with open('numbers_of_lines.txt', 'w', encoding = 'utf-8') as m: m.write(str(i)) import re with open ('text.txt', 'r', encoding = 'utf-8') as f: text = f.read() result = re.finditer('откр(ы((т(ый|ь))|л(а|о|и)?|в(ший?)?)|о(й(те|ся)?|ют?|е(шь|те?|м)))', text) for match in result: print(match.group()) import random def noun(syllables): file = open('nouns_' + syllables + '.txt','r', encoding = 'utf-8') text = file.read() verbs = text.split('\n') return random.choice(verbs) def verb(syllables): file = open('verbs_' + syllables + '.txt','r', encoding = 'utf-8-') text = file.read() verbs = text.split('\n') return random.choice(verbs) def punctuation(): marks = ['.', ',', '?', '!', '', '-', '...', '?!'] return random.choice(marks) def stroka_5(): return noun('2') + ' ' + verb('3') + punctuation() def stroka_7(): return noun('4') + ' ' + verb('3') + punctuation() def create_poem(): return stroka_5() + '\n' + stroka_7() + '\n' + stroka_5() print(create_poem()) import csv def lets_play(): words = {} with open('words.csv', 'r', encoding='utf-8') as f: text = csv.reader(f, delimiter=',') for row in text: words[row[0]] = row[1] n = 0 keys = list(words.keys()) while n < len(words): i = 0 while i <= len(words): if i < len(words): resp = input(keys[n] + ' ') if resp == words[keys[n]]: print('You rock!') n+=1 break else: print('No. You have ' +str(len(words[keys[n]]))+' more guesses.') i+=1 else: print('Sorry, but you have run out of guesses. The right answer is '+keys[n]+' '+words[keys[n]]) n+=1 c = lets_play() import xml.etree.ElementTree as ET import glob, os f = open('cout_of_sentences', 'w') for file in glob.glob("*.xhtml"): print(file) tree = ET.parse(file) root = tree.getroot() i=0 for word in root.iter('se'): i=i+1 f.write(file+"\t"+str(i)+"\n") print (i) f.close() import xml.etree.ElementTree as ET import csv import glob, os csvfile = open('info.csv', 'w') with csvfile: fieldnames = ['Название файла', 'Автор', 'Тематика текста'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames,delimiter=';') writer.writeheader() for file in glob.glob("*.xhtml"): tree = ET.parse(file) root = tree.getroot() author='' topic='' for meta in root.iter('meta'): if (meta.attrib['name']=='author'): author=meta.attrib['content'] if (meta.attrib['name']=='topic'): topic=meta.attrib['content'] writer.writerow({'Название файла': file, 'Автор': author,'Тематика текста':topic}) f = open('one.txt', 'r', encoding = 'utf-8') maks = 1; mini = 999999; for line in f: words = line.split() if len(words) != 0: s = len(words) - 1 for each in words: s += len(each) if s > maks: maks = s if s < mini: mini = s print(maks/mini) f.close() w = input('Спрашивает у пользователя слово (в кириллице) ') t = len(w) - 1 print('Выводит на экран нечётные буквы этого слова (но только если это буква "о", буква "п" или буква "е"\n') while t>=0: if (t%2==0) and ((w[t]=="о") or (w[t]=="п") or (w[t]=="е")): print('"', w[t], '" ') t-=1 import random vowels = "АЕЁИОУЫЭЮЯаеёиоуыэюя" stressed_vowels = "АЕЁИОУЫЭЮЯ" non_stressed_vowels ="аеёиоуыэюя" strng = '' def word_read(): f = open("stressed_out.txt", 'r', encoding = "utf-8") f2 = f.read() f.close() words = f2.split('\n') return words def word_info_vowels(words): vowel_info = [] for indx, word in enumerate(words): vowel_info.append(0) for letter in word: if letter in vowels: vowel_info[indx] += 1 return vowel_info def word_info_stresses(words): stress_info = [] for indx, word in enumerate(words): stress_info.append(0) vow = 0 for letter in word: if letter in vowels: vow += 1 if letter in stressed_vowels: stress_info[indx] = vow return stress_info def strng_tv(): t_v = 0 for letter in strng: if letter in stressed_vowels: t_v = 0 if letter in non_stressed_vowels: t_v += 1 return t_v def strng_tsv(): t_s_v = 0 for letter in strng: if letter in stressed_vowels: t_s_v += 1 return t_s_v def new_word_in_line(words, vowel_info, stress_info): global strng t_v = strng_tv() t_s_v = strng_tsv() if (t_v == 0) and (t_s_v == 0): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 1) and (vowel_info[n] < 4): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 0) and ((t_s_v == 1) or (t_s_v == 2)): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 3) and (vowel_info[n] < 6): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 0) and (t_s_v == 3): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 3) and (vowel_info[n] == 5): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 1) and ((t_s_v == 1) or (t_s_v == 2)): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 2) and (vowel_info[n] < 5): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 1) and (t_s_v == 3): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 2) and (vowel_info[n] == 4): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 2) and ((t_s_v == 1) or (t_s_v == 2)): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 1) and (vowel_info[n] < 4): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 2) and (t_s_v == 3): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 1) and (vowel_info[n] == 3): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " def create_line(words, vowel_info, stress_info): global strng strng = "" for n in range(4): new_word_in_line(words, vowel_info, stress_info) new_word_in_line(words, vowel_info, stress_info) def create_txt(words, vowel_info, stress_info): global strng for n in range(4): create_line(words, vowel_info, stress_info) print(strng.lower().capitalize()) def main(): words = word_read() vowel_info = word_info_vowels(words) stress_info = word_info_stresses(words) create_txt(words, vowel_info, stress_info) main() symbols = '\'"/.,<>:;[]{}\\|1234567890`~!@ capital_letters = 'QWERTYUIOPASDFGHJKLZXCVBNMЙЦУКЕНГШЩЗХЪЁФЫВАПРОЛДЖЭЯЧСМИТЬБЮ' template = 'Предложение № {0}, слово с заглавной буквы № {1} - это "{2}".\n' def open_file(): f = open("oister.txt", 'r', encoding = "utf-8") st = f.read() f.close() return st def clean(st): arr_dirty = st.replace('.,!?…', '.').split('.') arr_cleaner = [sentence.replace(symbols, '') for sentence in arr_dirty] arr_less_10 = [sentence for sentence in arr_cleaner if len(sentence.split()) >= 10] arr = [sentence.split() for sentence in arr_cleaner] return arr def find_capitals(arr): for indx, sentence in enumerate(arr): i = 0 for word in sentence: if word[0] in capital_letters: i += 1 print(template.format(indx + 1, i, word)) def main(): st = open_file() arr = clean(st) find_capitals(arr) main() import random def open_file(): f = open("words_with_explanations.csv", 'r', encoding = "utf-8") arr = f.readlines() f.close() return arr def mapish(arr): mp = {} for line in arr: words = line.split(";") mp[words[0]] = words[1].replace('\n', '') return mp def game(mp): answ = input('Хотите сыграть - введите "да" ') while answ == 'да': word = random.choice(list(mp.keys())) st = "" for letter in mp[word]: st += "*" print(word, " ", st) t = False while t == False: guess = input("Ваша версия ") if guess == mp[word]: t = True print("Да!!! Ура!!!") else: print("Нет(") answ = input('Хотите сыграть - введите "да" ') def main(): arr = open_file() mp = mapish(arr) game(mp) main() import os import re def open_file(path): fi = open(path, 'r', encoding = "cp1251") st = fi.read() fi.close() return st def find_auth(raw): auth_arr = re.search('([а-яёА-ЯЁa-zA-Z- ]*)\..*', raw).group(1) auth = '' for l in auth_arr: auth += l return auth def find_date(raw): date_arr = re.search('.*, ([0-9.]*)', raw).group(1) date = '' for l in date_arr: date += l return date def all_in_all(): auth = [] date = [] file_all = [] path = 'news' for root, dirs, files in os.walk(path): for f in files: file_all.append(f) raw = open_file(os.path.join(root, f)) auth.append(find_auth(raw)) date.append(find_date(raw)) return auth, date, file_all def wrt(stri): f = open("result.csv", 'w', encoding = "utf-8") f.write(stri) f.close() def main(): auth, date, file_all = all_in_all() stri = '' for idx, file in enumerate(file_all): stri += file stri += ',' stri += auth[idx] stri += ',' stri += date[idx] stri += '\n' wrt(stri) main() punct = '\'"/.,<>:;[]{}\\|1234567890`~!@ import os import re def open_file(path): fi = open(path, 'r', encoding = "cp1251") st = fi.read() fi.close() return st def norm_txt(st): st_clean = st.replace('', '') st_clean = st_clean.replace('', '\n') st_clean = st_clean.replace('', '') st_clean = st_clean.replace('\n', '') st_clean = st_clean.replace('', '') st_clean = st_clean.replace('', ' ') st_clean = re.sub('', '', st_clean) st_clean = re.sub('<.*>\n', '', st_clean) st_clean = st_clean.replace(' ', ' ') return st_clean def count_words(f): num = 0 st = open_file(f) st_clean = norm_txt(st) arr_word = [] for word in st_clean.split(): if word.strip(punct) != '': arr_word.append(word.strip(punct)) num = len(arr_word) return num def all_in_all(): arr = [] file_all = [] path = 'news' for root, dirs, files in os.walk(path): for f in files: arr.append(count_words(os.path.join(root, f))) file_all.append(f) return arr, file_all def wrt(stri): f = open("result.txt", 'w', encoding = "utf-8") f.write(stri) f.close() def main(): arr, file_all = all_in_all() stri = '' for idx, file in enumerate(file_all): stri += file stri += '\t' stri += str(arr[idx]) stri += '\n' wrt(stri) main() import os cyrillic_symbols = '\'"/.,<>:;[]{}\\|1234567890` ~!@ def go_around(): q = 0 for root, dirs, files in os.walk('.'): for d in dirs: T = 0 for letter in d: if letter not in cyrillic_symbols: T += 1 if T == 0: q += 1 print(q) def main(): go_around() main() f = open('one.txt', 'r', encoding = 'utf-8') maks = 1; mini = 999999; for line in f: words = line.split() if len(words) != 0: s = len(words) - 1 for each in words: s += len(each) if s > maks: maks = s if s < mini: mini = s print(maks/mini) f.close() from random import randint def if_a_needed_num_or_not_that_much(x): tr = False for num in range (1, 101): if x == str(num): tr = True return tr t = False while 4 < 5: if t == False: a = randint(0, 100) + 1 t = True print('\n\n\nI\'ve chosen a number from 1 to 100, could you guess it? ') s = input('\nPut a number in here, \nTo stop the game press enter, to restart enter \'I give up\'\n') if s == '': break elif s == 'I give up' or s == 'i give up' or s == 'give up' or s == 'GO FUCK YOURSELF WITH SUCH A NUMBER' or s == '\'I give up\'': print('My number was ', a) t = False continue elif if_a_needed_num_or_not_that_much(s) == False: print('OOO!!! I\'m afraid that something has gone terribly wrong! Excuse me...') continue i = int(s) if a == i: t = False print('WELL DONE!!! ') elif a > i: print('My number is bigger. Try again \n') else: print('My number is smaller. Try again \n ')from random import randint def if_a_needed_num_or_not_that_much(x): tr = False for num in range (1, 101): if x == str(num): tr = True return tr t = False while 4 < 5: if t == False: a = randint(0, 100) + 1 t = True print('\n\n\nI\'ve chosen a number from 1 to 100, could you guess it? ') s = input('\nPut a number in here, \nTo stop the game press enter, to restart enter \'I give up\'\n') if s == '': break elif s == 'I give up' or s == 'i give up' or s == 'give up' or s == 'GO FUCK YOURSELF WITH SUCH A NUMBER' or s == '\'I give up\'': print('My number was ', a) t = False continue elif if_a_needed_num_or_not_that_much(s) == False: print('OOO!!! I\'m afraid that something has gone terribly wrong! Excuse me...') continue i = int(s) if a == i: t = False print('WELL DONE!!! ') elif a > i: print('My number is bigger. Try again \n') else: print('My number is smaller. Try again \n ') def open_file(): f = open("Austen Jane. Pride and Prejudice.txt", 'r', encoding = "utf-8") text = f.read() f.close() return text def clean(st): arr = st.split() for idx, word in enumerate(arr): arr[idx] = word.strip('\'"/.,<>:;[]{}\\|1234567890`~!@ arr[idx] = arr[idx].lower() return arr def find_ness(arr): new_arr = [] how_many = [] for word in arr: if word.endswith("ness") == True: if word in new_arr: for idx, each in enumerate(new_arr): if each == word: how_many[idx] += 1 else: new_arr.append(word) how_many.append(1) st = "" for word in new_arr: st += word st += " " return st, new_arr, how_many def find_max(arr, freq_arr): t = 0 for number in freq_arr: if number > t: t = number st = '' for idx, each in enumerate(arr): if freq_arr[idx] == t: st += each st += " " return st def main(): text = open_file() arr = clean(text) st1, new_arr, how_many = find_ness(arr) st2 = find_max(new_arr, how_many) print(st1) print(st2) main() import random vowels = "АЕЁИОУЫЭЮЯаеёиоуыэюя" stressed_vowels = "АЕЁИОУЫЭЮЯ" non_stressed_vowels ="аеёиоуыэюя" strng = '' def word_read(): f = open("stressed_out.txt", 'r', encoding = "utf-8") f2 = f.read() f.close() words = f2.split('\n') return words def word_info_vowels(words): vowel_info = [] for indx, word in enumerate(words): vowel_info.append(0) for letter in word: if letter in vowels: vowel_info[indx] += 1 return vowel_info def word_info_stresses(words): stress_info = [] for indx, word in enumerate(words): stress_info.append(0) vow = 0 for letter in word: if letter in vowels: vow += 1 if letter in stressed_vowels: stress_info[indx] = vow return stress_info def strng_tv(): t_v = 0 for letter in strng: if letter in stressed_vowels: t_v = 0 if letter in non_stressed_vowels: t_v += 1 return t_v def strng_tsv(): t_s_v = 0 for letter in strng: if letter in stressed_vowels: t_s_v += 1 return t_s_v def new_word_in_line(words, vowel_info, stress_info): global strng t_v = strng_tv() t_s_v = strng_tsv() if (t_v == 0) and (t_s_v == 0): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 1) and (vowel_info[n] < 4): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 0) and ((t_s_v == 1) or (t_s_v == 2)): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 3) and (vowel_info[n] < 6): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 0) and (t_s_v == 3): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 3) and (vowel_info[n] == 5): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 1) and ((t_s_v == 1) or (t_s_v == 2)): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 2) and (vowel_info[n] < 5): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 1) and (t_s_v == 3): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 2) and (vowel_info[n] == 4): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 2) and ((t_s_v == 1) or (t_s_v == 2)): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 1) and (vowel_info[n] < 4): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " if (t_v == 2) and (t_s_v == 3): d = 0 while d == 0: n = random.randint(0, len(words) - 1) if (stress_info[n] == 1) and (vowel_info[n] == 3): strng += words[n] strng += " " d += 1 if (stress_info[n] == 0) and (vowel_info[n] == 0): strng += words[n] strng += " " def create_line(words, vowel_info, stress_info): global strng strng = "" for n in range(4): new_word_in_line(words, vowel_info, stress_info) new_word_in_line(words, vowel_info, stress_info) def create_txt(words, vowel_info, stress_info): global strng for n in range(4): create_line(words, vowel_info, stress_info) print(strng.lower().capitalize()) def main(): words = word_read() vowel_info = word_info_vowels(words) stress_info = word_info_stresses(words) create_txt(words, vowel_info, stress_info) main() word = input('введите что ли слово ') string = "" for letter in word: string += letter print(string) import re def open_file(): f = open("isl.xml", 'r', encoding = "utf-8") st = f.read() f.close() return st def count_lines(st): n = 0 for each in st: if (each == "\n"): n += 1 return n + 1 def wrt(n): f = open('new.txt', 'w') f.write(str(n) + '\n') f.close() def main(): st = open_file() n = count_lines(st) wrt(n) main() import re def open_file(): f = open("isl.xml", 'r', encoding = "utf-8") st = f.read() f.close() return st def create_dic(st): dic = {} reg = '()(.*?)()' m = re.findall(reg, st) for exp in m: if exp[3] in dic: dic[exp[3]] += 1 else: dic[exp[3]] = 1 return dic def wrt(dic): f = open('new.txt', 'w') for each in dic: f.write(each) f.close() def main(): st = open_file() dic = create_dic(st) wrt(dic) main() import re def open_file(): f = open("isl.xml", 'r', encoding = "utf-8") st = f.read() f.close() return st def create_dic(st): dic = {} reg = '()(.*?)()' m = re.findall(reg, st) for exp in m: if exp[3] in dic: dic[exp[3]] += 1 else: dic[exp[3]] = 1 return dic def wrt(dic): f = open('new.txt', 'w') for each in dic: f.write(each) f.close() def main(): st = open_file() dic = create_dic(st) wrt(dic) main() import re def open_file(): f = open("SPB.html", 'r', encoding = "utf-8") st = f.read() f.close() return st def find_names(st): exp = '(">)(UTC)([0-9+-]*?)()' arr = re.findall(exp, st) return arr def clean_res(arr): new_arr = [] for each in arr: res = each[1] + each[2] if res not in new_arr: new_arr.append(res) return new_arr def wrt(arr): f = open('new.txt', 'w') for time in arr: f.write(time + '\n') f.close() def main(): st = open_file() arr = find_names(st) new_arr = clean_res(arr) wrt(new_arr) main() import os numbers = '1234567890' def list_of_folders(): files = {} for f in os.listdir(): if f in files: files[f] += 1 else: files[f] = 1 return files def no_numbers(files): clean_files = {} for f in files: t = 0 for number in numbers: if number in f: t += 1 if f in clean_files and t == 0: clean_files[f] += 1 elif f not in clean_files and t == 0: clean_files[f] = 1 return clean_files def print_out(clean_files, files): n = 0 for f in clean_files: n += 1 print(n) for fi in files: print(fi) def main(): files = list_of_folders() clean_files = no_numbers(files) print_out(clean_files, files) main() import re def open_file(): f = open("oister.txt", 'r', encoding = "utf-8") st = f.read() f.close() return st def clean(st): arr_dirty = st.split() for idx, word in enumerate(arr_dirty): arr_dirty[idx] = word.strip('\'"/.,<>:;[]{}\\|1234567890`~!@ arr = [] for each in arr_dirty: if (each != ""): st2 = " " + each + " " arr.append(st2) return arr def make_st(arr): st = '' for word in arr: if word != "": st += word st += " " return st def find_find(arr): exp = ' на(й|ш)(т|д|е|ё|л)(и|у|е|ё|л|д|я|а|о)(т|ш|м|н)?(е|ь|н|а|о|ы|ий|ие|ая|ее)?(ый|ая|ое|ые)?(ся|сь)? ' for word in arr: if re.search(exp, word): print(word) def main(): st = open_file() arr = clean(st) find_find(arr) main() import os import shutil def remove(folder): for root, dirs, files in os.walk(folder): for f in files: os.remove(f) for d in dirs: os.rmdir(d) os.rmdir(folder) remove('C:\\Users\\student\\Desktop\\papka\\pp') нужно как-тр делать через os.path.join Хождение по папкам for root, dirs, files in os.walk('.'): print(root, dirs) путь к файлу чтобы его открыть os.path.join(root, fname) file_tree = os.walk('.') for d in file_tree: print(d) for root, dirs, files in os.walk('.'): print(root) - идем в по папкам, если в одной из папок есть еще папки - сначала идем вглубь, затем к следующим папкам можно идти снизу вверх for root, dirs, files in os.walk('.', topdown=False): print(root) смотрим файлы кортеж(tuple) работает почти как массив, но - пишется в круглых скобках: t = (1,2,3) - его нельзя изменять после создания: t[1] = 4 - так нельзя кортеж можно использовать в кач-ве ключа словаря: dic[(1,2,3)] = r'питон' - кладем в словарь элементы и даем значение питон dic[[1,2,3]] = r'питон' - так нельзя потому что кортеж hashable, а массив нет можно сделать частотный словарь dic(r'мой', r'V'] = 20 dic(r'мой', r'RPO'] = 5 кортеж может возвращать любая функция a, b, c = func() a = func() print(a) >>> 1,2,3 print(a[1]) >>> 2 Замена m = re.sub(r'[^;]+', r'', s) Все НЕ точки с запятой меняем на пустоту в строке s в том что меняем нельзя использовать регулярные выражение re.search - возвращает объект типа match re.findall - возвращает массив кортежей re.sub - возвращает просто строку m = re.sub(r'\bкоше?к', r'собак', s) text = re.sub(r'\bКоше?к', r'Cобак', m) - второй раз меняем уже не в первой строке, а в новой, измененной ранее!!! r'\w+' значит что в строке не экранируемые символы замена повторяющихся слов m = re.sub(r'\\w+) \\1', r'\\1', s) что что заменит на что не нервничайте - будет работать плохо флаги немного меняют поведение регулярных выражений re.DOTALL: . значит все кроме переноса строки иногда хотим чтобы она включала /n, нам нужно написать flags = re.DOTALL все точки в регулярном выражении начнут значить любой знак и перенос строки удвоение всех слов: m = re.sub(r'\\w+', u'\\1 \\1', s) контекстное удаление: m = re.sub(r'([иео]).([иео])', [\w0-9] любая не буква или цифры от 0 до 9 убрать все html теги из документа m = re.sub(r'<.*?>', r'', s, flags = ) Привет станет Привет import re def opentext(fname): with open (fname, 'r', encoding = 'utf-8') as t: text = t.read() return text def deltag(fname): text = opentext(fname) m = re.sub(r'<.*?>', r' ', text) text = re.sub(r'\s+', r' ', m) return text def byeSteve(fname): text = deltag(fname) m = re.sub(r'Стив Джобс', r'Сабрина Маленькая Ведьма', text) print(m) def syllable(fname): opentext() def align_right(): arr = ['Kate', 'potato', 'Sasha', 'Okun', 'Валерка'] for i in arr: print('{:>10}'.format(i)) align_right() a = [1,2,3,4,5,6,7,8,9] b = [] for i in a: b.append(i**2) МОжем сделать то же самое в одну строчку new_b = [i**2 for i in a] words = ['Mary', 'John', 'Jack', 'Tim', 'Kate', 'Tom', 'Moses', 'Jesus'] new_words = [w.upper() for w in words] на выходе - MARY JOHN JAKE TIM... b = [] for i in a: if i < 10 and i%2 == 0: b.append(i**2) new_b = [i**2 for i in a if i < 10 and i%2 == 0] other_words = [w.upper() for w in words if re.search('[aAjJ]', w)] вместо массивов собираем словари d = {"корова": "му", "собака": "гав", "кот": "мяу", "свинюга": "хрю"} sounds = {d[key]: key for key in d} sounds = {d[key]: key for key in d if len(key) > 4} big = [a, new_b, words] flat = [] for arr in big: for item in arr: flat.append(item) или: flat = [item for arr in big for item in arr] s = 'Hello, world! That\'s all folks!' s.upper() s.lower() s.capitalize() s.title() template = 'Hello, {}!' template.format('John') name = 'Mary' template.format(name) template.format(input('Введите имя!!!!!!!12!1111: ')) template = 'Привет, {1} {0}! Вы, {0}, наш самый ценный клиент. ТОлько вам, {0} {1}, и только сегодня мы предлагаем шкурку от бананов!!!'.format('ПЕтя', 'Иванов') arr = [21, 45, 100, 4, 5, 6, 6, 99] template = 'Возраст: {:>10}' стрелочки - это выравнивание. < справа, > слева, ^ посередине. число - минимальное окно, в котором текст. через двоеточие вводим изменения в форматировании for i in arr: print(template.format(i)) '{:+>10}'.format('text') при выравнивании текст заполнится плюсиками вместо пробелов pi = 3.14159265358979323 'Ваше число {:.2f}'.format(pi) f означает что число дробное и берем два знака после запятой, а не просто два знака '{:+>10}'.format('эйяфладлайяокудль') import re def opentext(fname): with open (fname, 'r', encoding = 'utf-8') as t: text = t.read() return text def finddata(fname): text = opentext(fname) reg = '' if re.search(reg, text): card = re.search(reg, text).group() profreg = 'Преподаватели(.|\n)*?

(.+?)<' if re.search(profreg, card): number = re.search(profreg, card).group(2) with open ('data.txt', 'w', encoding = 'utf-8') as t: t.write(number) else: print('Информации о преподавателях нет') with open ('data.txt', 'w', encoding = 'utf-8') as t: t.write('Информации о преподавателях нет') else: print('В данной статье нет инфобокса') with open ('data.txt', 'w', encoding = 'utf-8') as t: t.write('В данной статье нет инфобокса') def main(): text = input('Введите название файла: ') finddata(text) if __name__ == '__main__': main() import os import shutil import re def findext(): d = {} ext = '(.*\.)(.*)' for root, dirs, files in os.walk('.'): for f in files: if re.search(ext, f).group(2) not in d: d[re.search(ext, f).group(2)]= '1' else: d[re.search(ext, f).group(2)] = str(int(d[re.search(ext, f).group(2)]) + 1) return d def findmax(): d = findext() k = 0 extm = '' for key in d: if int(d[key]) > k: k = int(d[key]) extm = key elif int(d[key]) == k: extm = extm + ', ' + key print('В текущей папке и в папках, лежащих в ней, наиболее часто встречаются файлы с расширениями: ' + extm + '. Они встречаются ' + str(k) + ' раз.') def main(): findmax() if __name__ == '__main__': main() word = input('Введите слово ') array = [] while word: array.append(word) word = input('Введите слово ') for i in range(len(array)): newword = array[i] newword = newword[i+1 : ] print(newword) def opentext(text): words = [] with open (text, 'r', encoding = 'utf-8') as t: newtext = t.read() newtext = newtext.lower() words = newtext.split() for i in range(len(words)): words[i] = words[i].strip('”“".,!?') return words def numbhood(text): words = opentext(text) hood = [] for i in range(len(words)): if len(words[i])>4: if words[i].endswith('hood'): if words[i] not in hood: hood.append(words[i]) return hood def frequency(text, word): words = opentext(text) freq = 0 for i in range(len(words)): if words[i] == word: freq += 1 return freq text = input('Введите имя файла с английским текстом: ') hood = numbhood(text) print('В тексте нашлось', len(hood), 'существительных с суффиксом -hood.') hfreq = [] for i in range(len(hood)): hfreq.append(frequency(text, hood[i])) minfreq = [] for i in range(len(hood)): if hfreq[i] == min(hfreq): minfreq.append(hood[i]) forms = [] for i in range(len(minfreq)): forms.append(minfreq[i][0:-4]) ', '.join(forms) ', '.join(minfreq) print('Существительные с суффиксом -hood, имеющие наименьшую частотность в тексте: ' + str(minfreq)) print('Они образованы от слов: ' + str(forms)) import re def opentext(fname): with open (fname, 'r', encoding = 'utf-8') as t: text = t.read() return text def sublanguage(fname): text = opentext(fname) lang = '(язык)(и|а(ми?|х)?|у|о[мв]|е)?(\s|\.| |\?|\'|,|-|"|»|!|\(|\)|;|:)' Lang = '(Язык)(и|а(ми?|х)?|у|о[мв]|е)?(\s|\.| |\?|\'|,|-|"|»|!|\(|\)|;|:)' l = re.search(lang, text) L = re.search(Lang, text) if re.search(lang, text): text = re.sub(l.group(1), 'шашлык', text) if re.search(Lang, text): text = re.sub(L.group(1), 'Шашлык', text) return text def savenew(fname): text = sublanguage(fname) with open ('newlingua.html', 'w', encoding = 'utf-8') as t: t.write(text) def main(): savenew('lingua.html') if __name__ == '__main__': main() import re def opentext(text): sentences = [] with open (text, 'r', encoding = 'utf-8') as t: newtext = t.read() newtext = re.sub('\n', ' ', newtext) sentences = re.split('\?|!|\?!|\.\.\.|\.|…', newtext) sentences = [re.sub('[”“"–«»:;(),]', '', i) for i in sentences] return sentences def makewordlen(text): sentences = opentext(text) wordlen = [[i, len(i)] for s in sentences for i in s.split()] return wordlen def form(text): wordlen = makewordlen(text) template = '{}_{}' for w in range(len(wordlen)): print(template.format(wordlen[w][0], wordlen[w][1])) def main(): form('телеграмма.txt') if __name__ == '__main__': main() freq = [] conj = [] with open('text.txt','r', encoding = 'utf-8') as f: text = f.read() freq = text.split('\n') for word in freq: conj = word.split(' | ') if conj[1] == 'союз': print(word) female = [] string = ' ' words = [] ipm = 0 for word in freq: words = word.split(' ') if words[4] == 'ед' and words[5] == 'жен': female = word.split(' | ') strint += female[0] + ',' ipm += int(female[2]) print(string) print('Сумма всех ipm слов женского рода единственного числа равна', ipm , '.') newword = input('Введите слово') arr = [] r = [] while newword: arr.append(newword) newword = input('Введите слово') for word in freq: r = word.split(' | ') for newword in arr: if r[0] == newword: print(word) else: print('Слова ', i , ' нет в словаре.') import re forms = [] words = [] with open ('text.txt', 'r', encoding = 'utf-8') as t: texxt = t.read() texxt = texxt.lower() words = texxt.split() for i in range(len(words)): words[i] = words[i].strip('”“".,!?') prog = r"\b[п]рограммир(ова(в(ш((ий|ая|ее|его|ему|им|ем|ей|ую|ей)(ся)?|и(сь)?))?|ть|л(ся|[аои](сь)?)?)|у(ю(сь|(т|щ(ий|ая|ее|его|ему|им|ем|ую|ей|))(ся)?)?|е((шь|т|м)(ся)?|те(сь)?)|я(сь)?))\b" for i in range(len(words)): if re.search(prog,words[i]) != None: if words[i] not in forms: forms.append(words[i]) print('В тексте встретились такие формы глагола "программировать": ' + ', '.join(forms) + '.') кортеж(tuple) работает почти как массив, но - пишется в круглых скобках: t = (1,2,3) - его нельзя изменять после создания: t[1] = 4 - так нельзя кортеж можно использовать в кач-ве ключа словаря: dic[(1,2,3)] = r'питон' - кладем в словарь элементы и даем значение питон dic[[1,2,3]] = r'питон' - так нельзя потому что кортеж hashable, а массив нет можно сделать частотный словарь dic(r'мой', r'V'] = 20 dic(r'мой', r'RPO'] = 5 кортеж может возвращать любая функция a, b, c = func() a = func() print(a) >>> 1,2,3 print(a[1]) >>> 2 Замена m = re.sub(r'[^;]+', r'', s) Все НЕ точки с запятой меняем на пустоту в строке s в том что меняем нельзя использовать регулярные выражение re.search - возвращает объект типа match re.findall - возвращает массив кортежей re.sub - возвращает просто строку m = re.sub(r'\bкоше?к', r'собак', s) text = re.sub(r'\bКоше?к', r'Cобак', m) - второй раз меняем уже не в первой строке, а в новой, измененной ранее!!! r'\w+' значит что в строке не экранируемые символы замена повторяющихся слов m = re.sub(r'\\w+) \\1', r'\\1', s) что что заменит на что не нервничайте - будет работать плохо флаги немного меняют поведение регулярных выражений re.DOTALL: . значит все кроме переноса строки иногда хотим чтобы она включала /n, нам нужно написать flags = re.DOTALL все точки в регулярном выражении начнут значить любой знак и перенос строки удвоение всех слов: m = re.sub(r'\\w+', u'\\1 \\1', s) контекстное удаление: m = re.sub(r'([иео]).([иео])', [\w0-9] любая не буква или цифры от 0 до 9 убрать все html теги из документа m = re.sub(r'<.*?>', r'', s, flags = ) Привет станет Привет import re def opentext(fname): with open (fname, 'r', encoding = 'utf-8') as t: text = t.read() return text def deltag(fname): text = opentext(fname) m = re.sub(r'<.*?>', r' ', text) text = re.sub(r'\s+', r' ', m) return text def byeSteve(fname): text = deltag(fname) m = re.sub(r'Стив Джобс', r'Сабрина Маленькая Ведьма', text) print(m) def syllable(fname): opentext() import re lemma = r'' def openlines(fname): lines = [] with open (fname, 'r', encoding = 'utf-8') as t: lines = t.readlines() return lines def writelines(fname, text): lines = openlines(fname) with open ('lines.txt', 'w', encoding = 'utf-8') as f: f.write(str((len(lines)))) def opentext(fname): text = [] with open (fname, 'r', encoding = 'utf-8') as t: text = t.read() return text def lemm(fname): text = opentext(fname) lemmas = re.findall(lemma, text) return lemmas def freq(fname): lemmas = lemm(fname) d = {} for i in range(len(lemmas)): if lemmas[i] in d: d[lemmas[i]] += 1 else: d[lemmas[i]] = 1 return d def writekeys(fname): d = freq(fname) with open ('keys.txt', 'w', encoding = 'utf-8') as f: for key in d: f.write(key + '\n') def main(): writelines('file.xml', 'lines.txt') writekeys('file.xml') def makepuzzle(words): puzzle = {} strings = [] word = [] with open (words, 'r', encoding = 'utf-8') as w: strings = w.readlines() for i in range(len(strings)): strings[i] = strings[i].strip('\n') for i in range(len(strings)): word = strings[i].split(';') puzzle[word[0]] = word[1] return puzzle def trytoguess(): puzzle = makepuzzle('words.csv') for key in puzzle: print(puzzle[key], '...') guess = input('Дополните это словосочетание: ') for i in range(len(puzzle[key])): if guess == key: print("Вы угадали!!! Это" , '"' + puzzle[key] , key + '".') break guess = input('Вы не угадали, попробуйте еще раз: ') if i == len(puzzle[key]) - 1: print('Вы проиграли.') trytoguess() import re import os import shutil def number(folder): reg = '' for i in os.listdir(folder): arr = [] with open(os.path.join(folder, i), 'r', encoding = 'utf-8') as t: text = t.read() for t in re.findall(reg, text): arr.append(t) with open('sentences.txt', 'a', encoding = 'utf-8') as f: f.write(i+'\t'+str(len(arr))+'\n') def table(folder): with open('info.csv', 'w', encoding = 'utf-8') as f: f.write('Файл ; Автор ; Тема \n') for i in os.listdir(folder): auth = '' topic = '' with open(os.path.join(folder, i), 'r', encoding = 'utf-8') as t: text = t.read() for t in re.findall(auth, text): for j in re.findall(topic, text): with open('info.csv', 'a', encoding = 'utf-8') as f: f.write(i+' ; '+t+' ; '+j+'\n') def bi(folder): sen = '((.|\n)*?)' pr = '' loc = 'gr="S.*?loc">(\w*)<' sentence = '' word = '(\w*)/.' for i in os.listdir(folder): with open(os.path.join(folder, i), 'r', encoding = 'utf-8') as t: text = t.read() for s in re.findall(sen, text): print(s) for p in re.findall(pr, s): for l in re.findall(loc, s): for i in re.findall(word, s): sentence = sentence + i + ' ' with open('bigr.txt', 'a', encoding = 'utf-8') as f: f.write(p+' '+l+'\t' + sentence + '\n') def main(): number('news') table('news') bi('news') if __name__ == '__main__': main() import re def opentext(text): with open (text, 'r', encoding = 'utf-8') as t: text = t.read() return(text) def anawords(text): t = opentext(text) nwords = re.findall(r'', t) nana = re.findall(r'(.*?)', t) reg = '<.*=ins.*>' com = '(\w+)<' cont = [] words = [] for s in range(len(strings)): if re.search(reg, strings[s]): word = strings[s-3]+strings[s-2]+strings[s-1]+strings[s]+strings[s+1]+strings[s+2]+strings[s+3] cont.append(word) for i in cont: three = '' for j in re.findall(com, i): three = three+j+' ' words.append(three) return words def makeins(text): words = SIns(text) with open ('ins.txt', 'w', encoding = 'utf-8') as f: for w in words: seven = w.split() f.write(seven[0]+' '+seven[1]+' '+seven[2]+'\t'+seven[3]+'\t'+seven[4]+' '+seven[5]+' '+seven[6]+'\n') def main(): anawords('text.xml') makefreq('text.xml') makeins('text.xml') if __name__ == '__main__': main() a = input('Введите число a') b = input('Введите число b') c = input('Введите число c') a = int(a) b = int(b) c = int(c) if a%b==c: print('Остаток от деления a на b равен c') else: print('Остаток от деления a на b не равен c') if a*c+b==0: print('Число с является решением уравнения "ax+b=0"') else: print('Число с не является решением уравнения "ax+b=0"') input() word = input('Введите слово: ') for i in range(len(word)): newword = (word[i:] + word[:i]) print(newword) arr = [] with open('text.txt','r', encoding = 'utf-8') as t: text = t.read() text = text.replace('\n', ' ') arr = text.split(' ') len1 = 0 len3 = 0 for word in arr: if len(word) == 1: len1 += 1 elif len(word) == 3: len3 += 1 if len1 == 0: print('В тексте нет слов длиной в 1 символ') elif len3 == 0: print('В тексте нет слов длиной в 3 символа') else: dif = str(len3/len1) print('В тексте в ' + dif + ' раз больше слов длиной в 3 символа, чем слов длиной в 1 символ') a=input("Введите слово") a=a[::-1] i=0 for letter in a: if letter!='з'and letter!='я': print(letter) i+=1 input() import random def noun(number): if number == 's': s = [] with open('snouns.txt','r', encoding = 'utf-8') as n: snoun = n.read() s = snoun.split(' ') return random.choice(s) pl = [] with open('plnouns.txt','r', encoding = 'utf-8') as nn: plnoun = nn.read() pl = plnoun.split(' ') return random.choice(pl) def verb(numb): if numb == 's': sv = [] with open('sverbs.txt','r', encoding = 'utf-8') as v: sverb = v.read() sv = sverb.split(' ') return random.choice(sv) plv = [] with open('plverbs.txt','r', encoding = 'utf-8') as v: plverb = v.read() plv = plverb.split(' ') return random.choice(plv) def modif(): am = [] with open('modif.txt','r', encoding = 'utf-8') as m: modifier = m.read() am = modifier.split(' ') return random.choice(am) def imperative(): imp = [] with open('imperative.txt','r', encoding = 'utf-8') as i: imper = i.read() imp = imper.split(' ') return random.choice(imp) def conconj(): con = [] with open('condconj.txt','r', encoding = 'utf-8') as co: cond = co.read() con = cond.split(' ') return random.choice(con) def conjunction(): conj = [] with open('conj.txt','r', encoding = 'utf-8') as c: conjs = c.read() conj = conjs.split(' ') return random.choice(conj) def sentence1(): return noun('s') + ' ' + verb('s') + ' ' + modif() + '.' def sentence2(): return noun('pl') + ' ' + verb('pl') + ' ' + modif() + '?' def sentence3(): return conconj() + ' ' + noun('pl') + ' - ' + noun('pl') + ', ' + conjunction() + ' ' + noun('s') + ' ' + verb('s') + '.' def sentence4(): return noun('pl') + ' не ' + verb('pl') + ' ' + modif() + '.' def sentence5(): return noun('s') + ', ' + imperative() + ' ' + modif() + '!' def make_text(): text = 0 text = random.choice([1,2,3,4,5]) while text: if text == 1: print(sentence1()) text = random.choice([2,3,4,5]) if text == 2: print(sentence2()) text = random.choice([3,4,5]) if text == 3: print(sentence3()) text = random.choice([4,5]) if text == 4: print(sentence4()) print(sentence5()) else: print(sentence5()) print(sentence4()) break elif text == 4: print(sentence4()) text = random.choice([3,5]) if text == 3: print(sentence3()) print(sentence5()) else: print(sentence5()) print(sentence3()) break else: print(sentence5()) text = random.choice([3,4]) if text == 3: print(sentence3()) print(sentence4()) else: print(sentence4()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([2,4,5]) if text == 2: print(sentence2()) text = random.choice([4,5]) if text == 4: print(sentence4()) print(sentence5()) else: print(sentence5()) print(sentence4()) break elif text == 4: print(sentence4()) text = random.choice([2,5]) if text == 2: print(sentence2()) print(sentence5()) else: print(sentence5()) print(sentence2()) break else: print(sentence5()) text = random.choice([2,4]) if text == 2: print(sentence2()) print(sentence4()) else: print(sentence4()) print(sentence2()) break elif text == 4: print(sentence4()) text = random.choice([2,3,5]) if text == 2: print(sentence2()) text = random.choice([3,5]) if text == 3: print(sentence3()) print(sentence5()) else: print(sentence5()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([2,5]) if text == 2: print(sentence2()) print(sentence5()) else: print(sentence5()) print(sentence2()) break else: print(sentence5()) text = random.choice([2,3]) if text == 2: print(sentence2()) print(sentence3()) else: print(sentence3()) print(sentence2()) break else: print(sentence5()) text = random.choice([2,3,4]) if text == 2: print(sentence2()) text = random.choice([3,4]) if text == 3: print(sentence3()) print(sentence4()) else: print(sentence4()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([2,4]) if text == 2: print(sentence2()) print(sentence4()) else: print(sentence4()) print(sentence2()) break else: print(sentence4()) text = random.choice([2,3]) if text == 2: print(sentence2()) print(sentence3()) else: print(sentence3()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,3,4,5]) if text == 1: print(sentence1()) text = random.choice([3,4,5]) if text == 3: print(sentence3()) text = random.choice([4,5]) if text == 4: print(sentence4()) print(sentence5()) else: print(sentence5()) print(sentence4()) break elif text == 4: print(sentence4()) text = random.choice([3,5]) if text == 3: print(sentence3()) print(sentence5()) else: print(sentence5()) print(sentence3()) break else: print(sentence5()) text = random.choice([3,4]) if text == 3: print(sentence3()) print(sentence4()) else: print(sentence4()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([1,4,5]) if text == 1: print(sentence1()) text = random.choice([4,5]) if text == 4: print(sentence4()) print(sentence5()) else: print(sentence5()) print(sentence4()) break elif text == 4: print(sentence4()) text = random.choice([1,5]) if text == 1: print(sentence1()) print(sentence5()) else: print(sentence5()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,4]) if text == 1: print(sentence1()) print(sentence4()) else: print(sentence4()) print(sentence1()) break elif text == 4: print(sentence4()) text = random.choice([1,3,5]) if text == 1: print(sentence1()) text = random.choice([3,5]) if text == 3: print(sentence3()) print(sentence5()) else: print(sentence5()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([1,5]) if text == 1: print(sentence1()) print(sentence5()) else: print(sentence5()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,3]) if text == 1: print(sentence1()) print(sentence3()) else: print(sentence3()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,3,4]) if text == 1: print(sentence1()) text = random.choice([3,4]) if text == 3: print(sentence3()) print(sentence4()) else: print(sentence4()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([1,4]) if text == 1: print(sentence1()) print(sentence4()) else: print(sentence4()) print(sentence1()) break else: print(sentence4()) text = random.choice([1,3]) if text == 1: print(sentence1()) print(sentence3()) else: print(sentence3()) print(sentence1()) break elif text == 3: print(sentence3()) text = random.choice([1,2,4,5]) if text == 1: print(sentence1()) text = random.choice([2,4,5]) if text == 2: print(sentence2()) text = random.choice([4,5]) if text == 4: print(sentence4()) print(sentence5()) else: print(sentence5()) print(sentence4()) break elif text == 4: print(sentence4()) text = random.choice([2,5]) if text == 2: print(sentence2()) print(sentence5()) else: print(sentence5()) print(sentence2()) break else: print(sentence5()) text = random.choice([2,4]) if text == 2: print(sentence2()) print(sentence4()) else: print(sentence4()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,4,5]) if text == 1: print(sentence1()) text = random.choice([4,5]) if text == 4: print(sentence4()) print(sentence5()) else: print(sentence5()) print(sentence4()) break elif text == 4: print(sentence4()) text = random.choice([1,5]) if text == 1: print(sentence1()) print(sentence5()) else: print(sentence5()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,4]) if text == 1: print(sentence1()) print(sentence4()) else: print(sentence4()) print(sentence1()) break elif text == 4: print(sentence4()) text = random.choice([1,2,5]) if text == 1: print(sentence1()) text = random.choice([2,5]) if text == 2: print(sentence2()) print(sentence5()) else: print(sentence5()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,5]) if text == 1: print(sentence1()) print(sentence5()) else: print(sentence5()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,2]) if text == 1: print(sentence1()) print(sentence2()) else: print(sentence2()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,2,4]) if text == 1: print(sentence1()) text = random.choice([2,4]) if text == 2: print(sentence2()) print(sentence4()) else: print(sentence4()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,4]) if text == 1: print(sentence1()) print(sentence4()) else: print(sentence4()) print(sentence1()) break else: print(sentence4()) text = random.choice([1,2]) if text == 1: print(sentence1()) print(sentence2()) else: print(sentence2()) print(sentence1()) break elif text == 4: print(sentence4()) text = random.choice([1,2,3,5]) if text == 1: print(sentence1()) text = random.choice([2,3,5]) if text == 2: print(sentence2()) text = random.choice([3,5]) if text == 3: print(sentence3()) print(sentence5()) else: print(sentence5()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([2,5]) if text == 2: print(sentence2()) print(sentence5()) else: print(sentence5()) print(sentence2()) break else: print(sentence5()) text = random.choice([2,3]) if text == 2: print(sentence2()) print(sentence3()) else: print(sentence3()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,3,5]) if text == 1: print(sentence1()) text = random.choice([3,5]) if text == 3: print(sentence3()) print(sentence5()) else: print(sentence5()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([1,5]) if text == 1: print(sentence1()) print(sentence5()) else: print(sentence5()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,3]) if text == 1: print(sentence1()) print(sentence3()) else: print(sentence3()) print(sentence1()) break elif text == 3: print(sentence3()) text = random.choice([1,2,5]) if text == 1: print(sentence1()) text = random.choice([2,5]) if text == 2: print(sentence2()) print(sentence5()) else: print(sentence5()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,5]) if text == 1: print(sentence1()) print(sentence5()) else: print(sentence5()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,2]) if text == 1: print(sentence1()) print(sentence2()) else: print(sentence2()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,2,3]) if text == 1: print(sentence1()) text = random.choice([2,3]) if text == 2: print(sentence2()) print(sentence3()) else: print(sentence3()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,3]) if text == 1: print(sentence1()) print(sentence3()) else: print(sentence3()) print(sentence1()) break else: print(sentence3()) text = random.choice([1,2]) if text == 1: print(sentence1()) print(sentence2()) else: print(sentence2()) print(sentence1()) break else: print(sentence5()) text = random.choice([1,2,3,4]) if text == 1: print(sentence1()) text = random.choice([2,3,4]) if text == 2: print(sentence2()) text = random.choice([3,4]) if text == 3: print(sentence3()) print(sentence4()) else: print(sentence4()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([2,4]) if text == 2: print(sentence2()) print(sentence4()) else: print(sentence4()) print(sentence2()) break else: print(sentence4()) text = random.choice([2,3]) if text == 2: print(sentence2()) print(sentence3()) else: print(sentence3()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,3,4]) if text == 1: print(sentence1()) text = random.choice([3,4]) if text == 3: print(sentence3()) print(sentence4()) else: print(sentence4()) print(sentence3()) break elif text == 3: print(sentence3()) text = random.choice([1,4]) if text == 1: print(sentence1()) print(sentence4()) else: print(sentence4()) print(sentence1()) break else: print(sentence4()) text = random.choice([1,3]) if text == 1: print(sentence1()) print(sentence3()) else: print(sentence3()) print(sentence1()) break elif text == 3: print(sentence3()) text = random.choice([1,2,4]) if text == 1: print(sentence1()) text = random.choice([2,4]) if text == 2: print(sentence2()) print(sentence4()) else: print(sentence4()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,4]) if text == 1: print(sentence1()) print(sentence4()) else: print(sentence4()) print(sentence1()) break else: print(sentence4()) text = random.choice([1,2]) if text == 1: print(sentence1()) print(sentence2()) else: print(sentence2()) print(sentence1()) break else: print(sentence4()) text = random.choice([1,2,3]) if text == 1: print(sentence1()) text = random.choice([2,3]) if text == 2: print(sentence2()) print(sentence3()) else: print(sentence3()) print(sentence2()) break elif text == 2: print(sentence2()) text = random.choice([1,3]) if text == 1: print(sentence1()) print(sentence3()) else: print(sentence3()) print(sentence1()) break else: print(sentence3()) text = random.choice([1,2]) if text == 1: print(sentence1()) print(sentence2()) else: print(sentence2()) print(sentence1()) break make_text() модули import os import shutil Windows C:\\Users\\student\\Downloads - экранируем слэши (на маке и линуксе слеши наоборот) os.path.abspath('.') - абсолютный путь к папке, в которой я нахожусь ссейчас(точка - текущая папка) os.getcwd - то же самое os.path.join('texts','1.txt') соединяет название файла и папки os.path.exists('texts') - проверяет, есть ли такая папка или файл os.listdir('.') - возвращает массив со строками-именами файлов в папке s = 'приветки!' i = 1 for f in os.listdir('.'): if f.endswith('.txt'): with open(f, 'a', encoding = 'utf-8') as w: w.write(s*i) i += 1 Если в папке есть файлы txt, то в них записывается слово приветки i раз os.mkdr('folder1') - создать папку os.makedirs('a\\long\\volk\\kot') - создать папки в папке os.rename('Староеимя', 'новоеимя') - переименовать файл или папку os.path.isfile(r'texts\corpus1.txt') - проверяет, является ли то, что задано файлом (r для неэкранирования) os.path.isdir(r'texts\lalala.txt') - проверяет, является ли папкой shutil.copy('texts\\corpus1.txt', 'newcorpus\\corpus1.txt') - копирует файл из папки в другую папку shutil.copytree('texts', 'corpus') - копирует все из одной папки в новую которая создается shutil.move('texts\\lala.txt', 'corpus\\lala.txt') - перемещает файл из папки в другую папку os.remove(r'corpus\corpus2.txt') - удаляет файл shutil.rmtree('texts') - удаляет папку import os import shutil s = input('Введите предложение на англ языке: ') s_name = s.replace(' ', '\\') os.makedirs(s_name) import os import shutil n = int(input('Введите число: ')) for i in range(1, n+1): os.mkdir(str(i)) for j in range(1, i+1): with open(str(i)+'\\'+str(j) + '.txt', 'w', encoding="utf-8") as f: f.write(' ') import re import os def dirs(): kir = '[А-Яа-яЁё]*' stuff = '[A-Za-z\.\?!0-9"@№;%:?*_()-+= names = [] for f in os.listdir('.'): if os.path.isdir(f) and re.search(kir, f) and re.search(stuff, f) == None: names.append(f) return names def answer(): names = dirs() if names == []: print("В текущей папке нет папок, название которых состоит только из кириллических символов.") else: print("В текущей папке " + str((len(names))) + " папок, название которых состоит только из кириллических символов.") def allfiles(): files = [] name = '.*\.' for f in os.listdir('.'): if os.path.isdir(f) and f not in files: files.append(f) elif os.path.isfile(f) and re.search(name, f): n = re.search(name, f).group(0) n = n.strip('.') if n not in files: files.append(n) print("Вот названия всех найденных в текущей папке файлов и папок: ") for f in files: print(f) def main(): answer() allfiles() if __name__ == '__main__': main() import re from math import log punct = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) words = text_wo_punct.strip().split() return words import os anek = '' teh = '' izvest = '' for root, dirs, files in os.walk('texts'): for f in files: if 'anekdots' in root: num_anek = len(files) anek += open(os.path.join(root, f), encoding='utf-8').read() elif 'izvest' in root: num_izvest = len(files) izvest += open(os.path.join(root, f), encoding='utf-8').read() elif 'teh_mol' in root: num_teh = len(files) teh += open(os.path.join(root, f), encoding='utf-8').read() words_anek = preprocessing(anek) words_teh = preprocessing(teh) words_izvest = preprocessing(izvest) words = words_anek + words_teh + words_izvest def freq_dict(arr): dic = {} for element in arr: if element in dic: dic[element] += 1 else: dic[element] = 1 return dic corpus_freq = freq_dict(words) anek_freq = freq_dict(words_anek) izvest_freq = freq_dict(words_izvest) teh_freq = freq_dict(words_teh) def pmi_for_cats(x, y): if y == 'anek': dic = anek_freq num = num_anek elif y == 'teh': dic = teh_freq num = num_teh elif y == 'izvest': dic = izvest_freq num = num_izvest p_xy = dic[x]/len(dic) p_x, p_y = corpus_freq[x]/len(corpus_freq), num/(num_izvest + num_teh + num_anek) pmi = log(p_xy/(p_x * p_y)) return pmi cat_pmi = {} i = 0 for word in corpus_freq: if i > 100: break try: pmi_anek = pmi_for_cats(word, 'anek') except KeyError: pmi_anek = 0 try: pmi_teh = pmi_for_cats(word, 'teh') except KeyError: pmi_teh = 0 try: pmi_izvest = pmi_for_cats(word, 'izvest') except KeyError: pmi_izvest = 0 max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) if max_pmi == 0: continue if max_pmi == pmi_anek: cat = 'anek' elif max_pmi == pmi_teh: cat = 'teh' elif max_pmi == pmi_izvest: cat = 'izvest' print(word, cat) i += 1 import re from math import log punct = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) words = text_wo_punct.strip().split() return words with open('news.txt', 'r', encoding='utf-8') as f: words = preprocessing(f.read()) word_freq = {} for word in words: if word in word_freq: word_freq[word] += 1 else: word_freq[word] = 1 bigrams = [] for ind in range(1, len(words) - 1): bigrams.append(' '.join([words[ind - 1], words[ind]])) bigram_freq = {} for b in bigrams: if b in bigram_freq: bigram_freq[b] += 1 else: bigram_freq[b] = 1 def count_pmi(x, y): p_xy = bigram_freq[' '.join([x, y])]/len(bigram_freq) p_x, p_y = word_freq[x]/len(word_freq), word_freq[y]/len(word_freq) pmi = log(p_xy/(p_x * p_y)) return pmi pmi = {} for bigr in bigrams: x, y = bigr.split() pmi[bigr] = count_pmi(x, y) i = 0 for bigram in sorted(pmi, key = lambda m: -pmi[m]): if i > 100: break print(bigram, pmi[bigram]) i += 1 a = int(input()) b = int(input()) c = int(input()) if a * b == c: print ('Обладает 1 свойством') if a / b == c: print ('Обладает 2 свойством') else: print ('Не обладает свойствами') import random d = {'Алые': 'Паруса', 'Пластмассовый': 'Мир', 'Белые': 'Розы', 'Синее': 'Море', 'Черный': 'Передел'} key_list = [] for key in d: key_list.append(key) print("Подсказка: ") k = random.randint(1,5) print(d[key_list[k]]) count = 0 while(input() != key_list[k]): count+=1 print("Попытка номер ",count) count = count + 1 print("Ура, вы выиграли!") print(count) import re def search(): with open('F.xml', 'r', encoding = 'utf - 8') as f: k = 5 for line in f: if '' in line: break elif '' not in line: k = k + 1 print(k) f.close() f = open('number.txt', 'w',encoding = 'utf-8') k = str(k) f.write(k) f.close() search() def dictn(): d={} with open('F.xml', 'r', encoding = 'utf - 8') as f: for line in f: n = re.findall('type="*"', f) d = {n} if(key in d): d[key] = d[key]+1 else: d[key]=1 print(d) dictn() import re pattern = r'[с][ъ][е][^\s.]*' with open('word.txt', encoding='UTF-8') as file: for row in file: find = re.findall(pattern, row) for i in (len(find)): print('Найденные элементы') print(elem[i]) import os from os.path import isfile def search(): folder = 'C:/Users/Тимур/AppData/Local/Programs/Python/Python35-32' k = 0 print(os.listdir(folder)) for f in os.listdir(folder): if not isfile(f): if '_' in f: k = k + 1 print('file: ', f) if ' ' in f: k = k + 1 print('file: ', f) print(k) search() a = [] while True: word = input('Add a word') if word ==('') :break elif word.endswith ('tur'): a.append(word) print ('\n'. join(a)) import re import os import csv def open_file_tree(): names = {} file_tree = os.walk('news') for root, dirs, files in os.walk('news'): for f in files: with open(os.path.join(root,f), 'r') as p: texts = p.readlines() se_num = 0 for text in texts: if '/se' in text: se_num = se_num + 1 names[f] = se_num create_txt(names) def create_txt(dict): new = "" with open("result.txt", "w", encoding="utf-8") as file: for i in dict.keys(): new += "\n" + i.strip() + "\t" + str(dict[i]) file.write(new) def create_table(): data1 = u"FILENAME" + 'AUTHOR' + 'DATE' with open('result2.csv', 'w') as file: file.write(data1) def change_table(file,auth,date): data = "\n" + file + auth + date with open('result2.csv', 'a') as file: file.write(data) def table(): date = '' author = '' create_table() for root, dirs, files in os.walk('news'): for f in files: with open(os.path.join(root, f), 'r') as p: oneline = p.read() for i in re.finditer(r"", oneline): author = i.group(1) for j in re.finditer(r"", oneline): date = j.group(1) change_table(f, author, date) open_file_tree() table() import re import os import csv def open_file_tree(): names = {} file_tree = os.walk('news') for root, dirs, files in os.walk('news'): for f in files: with open(os.path.join(root,f), 'r') as f0: texts = f0.readlines() se_num = 0 for text in texts: if '/se' in text: se_num = se_num + 1 names[f] = se_num create_txt(names) def create_txt(dict): new = "" with open("result.txt", "w", encoding="utf-8") as file: for i in dict.keys(): new += "\n" + i.strip() + "\t" + str(dict[i]) file.write(new) open_file_tree() import re def replace(): with open('dino.html', 'r', encoding = 'utf-8') as f: article = f.read() f.close() m = re.sub(u'динозавр?', u'кот', article) m = re.sub(u'Динозавр?', u'Кот', m) n = re.sub(u'<.*?>', u'', m, flags = re.U) print(n) f = open('text.txt', 'w',encoding = 'utf-8') f.write(n) f.close replace() word = input('Введите слово : ') for i in range (len(word)): print(word[-i-1::]) import os from os.path import isfile def search(): folder = 'C:/Users/Тимур/AppData/Local/Programs/Python/Python35-32' p = 0 names = ['test'] print(os.listdir(folder)) for files in os.walk(folder): for f in files: for i in range(p): if name[i] != (f.split('.')[0]): names.append(f.split('.')[0]) p += 1 print(p) for name in names: print(name) search() def name(title): with open (title+'.txt', 'r', encoding ='utf-8') as f: text=f.read() words=text.split(' ') return words def ous(): words = name("title") p = 0 k = 0 for i, word in enumerate (words): if words[i].count("ous"): p = p + len(words[i]) k = k + 1 print(p / k) ous() f = open('C:\\Users\\Тимур\\Desktop\\text.txt', 'r') k = 0 l = [line.strip() for line in f] p = str (l) x=p.count(" ") x=x+1 p=p.split() for elem in p: if len(elem) > 10: k += 1 percent = k / x * 100 print(percent) import random def noun(): file = open('noun.txt', 'r', encoding = 'utf8') f = file.read() nouns = f.split('\n') return random.choice(nouns) def conjunction(): file = open('conjunction.txt', 'r', encoding = 'utf8') f = file.read() conjunctions = f.split('\n') return ", "+random.choice(conjunctions) def adjective(): file = open('adjective.txt', 'r', encoding = 'utf8') f = file.read() adjectives = f.split('\n') return random.choice(adjectives) def verb(): file = open('verb.txt', 'r', encoding = 'utf8') f = file.read() verbs = f.split('\n') return random.choice(verbs) def place(): file = open('place.txt', 'r', encoding = 'utf8') f = file.read() places = f.split('\n') return random.choice(places) def part_SS(): return adjective()+" " +noun()+" " +verb()+" "+place() def SS(): return (part_SS()+conjunction()+" "+part_SS()+".").capitalize() def IfSP(): return "в то время как " + adjective()+" " + noun() +" "+ verb() +" "+ place() + ", " + noun()+" "+verb() def TimeSP(): return "когда " + noun() +" "+ verb() + ", "+ adjective()+" "+ noun()+" "+verb() def SP(): ver = random.randint(1,2) if (ver == 1): return (IfSP()+".").capitalize() else: return (TimeSP()+".").capitalize() print("УДИВИТЕЛЬНЫЙ ШЕДЕВР НАПИСАННЫЙ МАШИНОЙ") for i in range(random.randint(5,10)): sen = random.randint(1,2) if(sen==1): print(SS()) else: print(SP()) print("НУ ВОТ И ВСЕ, РЕБЯТА") import re def search1(): with open('plant.html', 'r', encoding = 'utf-8') as f: found = [] article = f.read() f.close() result2 = re.findall(r'Семейство:\ <\/td>\n

\n(.*))' link = re.search(reg, content(name)) link = ((re.search(('title="(.*)"'),link.group())).group()).strip('title="') print("Отряд", link) with open (r'C:\Users\Анна\Documents\GitHub\prog\PythonHW11\lingva.html', 'r', encoding = 'utf-8') as f: content = f.read() import re article = re.sub(u'язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?!»():-;])', 'шашлык\\1', content) article2 = re.sub(u'Язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?»!():-;])', 'Шашлык\\1', article) with open ('new.txt', 'w', encoding='utf-8') as f: f.write(article2) def count_tf(word, text): return text.count(word) / len(text) def count_df(word, texts): n = [1 for text in texts if word in text] return sum(n) def count_idf(word, texts): n = len(texts) / (1 + count_df(word, texts)) return n from math import log def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) return log(tf, 10) * log(idf, 10) import re punct = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) words = text_wo_punct.strip().split() return words import os texts_dic = {} for root, dirs, files in os.walk('wikipedia'): for f in files[:50]: with open(os.path.join(root, f), 'r', encoding='utf-8') as t: text = preprocessing(t.read()) texts_dic[f.split('.')[0]] = text texts = list(texts_dic.values()) for text in texts_dic: for word in texts_dic[text]: scores = {} scores[word] = count_tf(word, texts_dic[text]) if scores[word] >= 55: texts_dic[text].pop(word) for text in texts_dic: print("Top words in document {}".format(text)) scores = {} for word in texts_dic[text]: scores[word] = count_tfidf(word, texts_dic[text], texts) sorted_words = sorted(scores.items(), key=lambda x: x[1]) for word, score in sorted_words[:5]: print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5))) import os import re import codecs f2 = open('table.csv', 'w', encoding = 'utf-8') f2.write('Название текста' + ',' + 'Автор' + ',' 'Дата создания текста') for file in os.listdir('.'): if file.endswith('xhtml'): with codecs.open(file, 'r', 'Windows-1251') as f: a = f.read() f.close() r1 = re.search('(.*)', a) r2 = re.search('', a) r3 = re.search('', a) if r1 and r2 and r3: f2.write(r1.group(1) + ',' + r2.group(1) + ',' + r3.group(1)) f2.close() import os import re import codecs f1 = open('file_words.txt', 'w', encoding = 'utf-8') for file in os.listdir('.'): if file.endswith('xhtml'): f = codecs.open(file, 'r', 'Windows-1251') a = f.read() f.close() r = re.search('(.*)', a) if r: f1.write(r.group(1) + '\t' + str(len(re.findall('', a))) + '\n') print('1') f1.close() import os def func1(): freqdict = {} for root, dirs, files in os.walk('.'): for d in dirs: if d[0] in freqdict: freqdict[d[0]] += 1 else: freqdict[d[0]] = 1 return freqdict def func2(freqdict): x = 0 for i in freqdict: if freqdict[i] > x: x = freqdict[i] a = i print ('название большинства папок начинается на ' + a) return func2(func1()) import random def length(string): s1 = '' i = 0 for i in range(len(string)): s1 += '.' return s1 def create_arr_and_dic(): f = open("Слова и подсказки.csv", encoding = "utf-8") a = f.readlines() arr = [] dic = {} for line in a: words = line.split(';') x = words[0].strip('\ufeff') arr.append(x) dic[x] = words[1].strip('\n') return arr, dic array, dictionary = create_arr_and_dic() y = random.choice(array) print('Вот ваша подсказка:', y, length(y)) z = input('Загаданное слово: ') if z == dictionary[y]: print('Правильно.') else: print('Увы, нет:(') import re def func1(): d = {} f = open('тестовый файл.txt', 'r', encoding = 'utf-8') a = f.readlines() for line in a: line = re.sub('(\.\.?\.?|\?|!)(\n)? ?', '.', line) sentences = line.split('.') for sentence in sentences: if len(sentence) >= 1: d[sentence] = {word.strip(): len(word.strip())\ for word in sentence.split(' ')} return d print(func1()) word=input('Введите слово ') anotherword='' sameword=word print(word) for i in range(len(word)-1): anotherword=word[len(word)-i-1] for k in range(len(sameword)-1): anotherword+=sameword[k] print(anotherword) sameword=anotherword f=open('text.txt', encoding="utf-8") a=f.readlines() z=0 x=0 m=0 n=0 for line in a: words=line.split() for z in range(len(words)): if words[z].endswith('.') or words[z].endswith(','): x+=1 m+=x x=0 n+=len(words) print ((n-m)/n*100, '% слов в этом тексте не оканчиваются точкой или запятой') import random def actor3(): slova=[] f=open('actor3.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 slovo=random.choice(slova) slovo=slovo.capitalize() return slovo def adj2(): slova=[] f=open('adj2.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 return random.choice(slova) def line1(noun, adjective): return noun + ' ' + adjective def adverb2(): slova=[] f=open('adverb2.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 slovo=random.choice(slova) slovo=slovo.capitalize() return slovo def verb2(): slova=[] f=open('verb2.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 return random.choice(slova) def place2(): slova=[] f=open('place2.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 return random.choice(slova) def line2(adverb, verb, place): return adverb + ' ' + verb + ' di ' + place + '.' def actor2(): slova=[] f=open('actor2.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 slovo=random.choice(slova) slovo=slovo.capitalize() return slovo def verb3(): slova=[] f=open('verb3.txt', encoding="utf-8") a=f.readlines() z=0 for line in a: words=line.split() for z in range(len(words)): slova.append(words[z]) z=0 return random.choice(slova) def line3(noun, verb): return noun + ' ' + verb + '.' def randomhaiku(): haiku = line1(actor3(), adj2()) +\ '\n' + line2(adverb2(), verb2(), place2()) +\ '\n' + line3(actor2(), verb3()) return haiku print (randomhaiku()) arr=[] s=input('Ввведите латинское слово ') if len(s)!=0: arr.append(s) while len(s)!=0: s=input('Ввведите латинское слово ') if s.endswith ('re') or s.endswith ('i')or s.endswith ('isse') \ or s.endswith ('us esse') or s.endswith ('a esse') or s.endswith ('um esse') \ or s.endswith ('um iri'): arr.append(s) for i in range (len(arr)): print (arr[i]) N=int(input('Введите число ')) x=1 while x!=N: word=input('Введите слово ') x+=1 if word=='программирование': break print('Работа программы завершена') import re def func1(): arr = [] i = 0 f = open("Текст с глаголом выпить.txt", encoding = "utf-8") a = f.readlines() for line in a: words = line.split() for i in range(len(words)): words[i] = words[i].lower() arr.append(words[i].strip('.,!?/\|()";:')) f.close() return arr arr1 = [] i = 0 for i in range(len(func1())): if re.search('вып((ей(те)?)|(ь(е((шь)|м|те?)|ют?))|(и((л(а|о|и)?)|(т(ь?|(ы(й|ми?|х|е))\ |(ая?)|(о(е|(го)|й|му?)?)|(ую))))|в(ш((ая)|(ую)|и(й|ми?|х)|е(е|ю|му?)))?))', func1()[i]): if func1()[i] not in arr1: arr1.append(func1()[i]) print(func1()[i]) a=input('Введите число а') b=input('Введите число b') c=input('Введите число c') a=int(a) b=int(b) c=int(c) if a+b==c: print ('a и b в сумме дают c') else: print ('a и b в сумме НЕ дают c') if a/b==c: print ('a разделить на b равно c') else: print ('a разделить на b НЕ равно c') def func1(text_file): ed = 0 y = 0 i = 0 f = open(text_file, encoding = "utf-8") a = f.readlines() for line in a: words = line.split() for i in range(len(words)): if words[i].endswith('ed'): ed += 1 if words[i].endswith('ied'): y += 1 arr = [] arr.append(ed) arr.append(y) return arr a = input('Введите название файла, который хотите открыть: ') print('Количество форм на -ed в тексте: ', func1(a)[0], \ '\nИз них образованы от глаголов на -y: ', func1(a)[1]) import re def func1(): f = open("Ферма, Пьер — Википедия.html", encoding = "utf-8") a = f.readlines() i = 0 for i in range(len(a)): r1 = re.search("", a[i]) r2 = re.search("
.+<\/a><\/td>', article) print(result2) search1() my_file = open('corp.txt', 'r', encoding = 'UTF-8') text = my_file.read() arr = text.split('\n') my_file.close() new_file = open('new.txt', 'w', encoding = 'UTF-8') quant = str(len(arr)) new_file.write(quant) new_file.close() import re my_file = open('corp.txt', encoding = 'UTF-8') text = my_file.read() arr = text.split() my_file.close() a = [] b = [] un = [] count = 0 for i in range(len(arr)): if 'type' in arr[i] and 'lemma' in arr[i + 1]: item = arr[i] left_num = item.find('"') right_num = item.rfind('"') a.append(item[left_num + 1 : right_num]) for i in range(len(a)): for j in range(len(a)): if a[i] == a[j]: count += 1 if a[i] not in un: un.append(a[i]) b.append(count) count = 0 my_dictionary = dict(zip(un, b)) print(my_dictionary) new_file = open(new.txt, 'w', encoding = 'UTF-8') new_file.write(my_dictionary) new_file.close() def openfile(name): my_file = open(name, encoding = 'UTF-8') s = my_file.read() name_list = s.split() my_file.close() return name_list def un_check(arr = []): count = 0 for item in arr: if item.startswith('un') or item.startswith('Un'): count += 1 return count def length_check(cyph, arr = []): count = 0 for item in arr: if len(item) > cyph: count += 1 res = (count / len(arr)) * 100 return res def starter(): file_name = input() un_number = un_check(openfile(file_name)) crit = int(input()) proc = length_check(crit, openfile(file_name)) print (un_number, proc) starter() import re my_file = open('rask.txt', encoding = 'UTF-8') text = my_file.read() my_file.close() regex = '[!?]' new_text = re.sub(regex, '.', text) sent = [item.split() for item in new_text.split('.')] for words in sent: unique = set() for word in words: word.strip('. , : -') if words.count(word) > 1 and word not in unique: unique.add(word) print('{:^10} {:^10}'.format(word, words.count(word))) import os def openfile(file): with open(file, encoding = 'UTF-8') as my_file: text = my_file.read() return set(text.split()) def check(name): flag1, flag2 = False, False for item in name: if item in openfile('cyr.txt'): flag1 = True elif item in openfile('lat.txt'): flag2 = True if flag1 and flag2: return True else: return False names = set() count = 0 for f in os.listdir(): if check(f): names.add(f) if os.path.isdir(f): count += 1 print('Найдено таких папок: ', count, '\n' 'Файлы и папки такого рода: ', *names) from __future__ import print_function import io word = ' союз ' with io.open('freq.txt', encoding='utf-8') as file: for line in file: if word in line1: print(line, end='') from __future__ import print_function import io word = ' жен ' with io.open('freq.txt', encoding='utf-8') as file: for line in file: if word in line: stop = ' ' i = 0 tail = len(line) ipm = 0 while line[i] != stop: print(line[i], end = ' ') i += 1 given_word = input('Введите, пожалуйста, любое слово: ') while len(given_word) > 0: print(given_word) given_word = given_word[1:len(given_word) - 1] import random def subj(): subj_file = open('nouns.txt', encoding = 'UTF-8') s = subj_file.read() subj_list = s.split() return random.choice(subj_list) subj_file.close def act(): act_file = open('activities.txt', encoding = 'UTF-8') s = act_file.read() act_list = s.split() return random.choice(act_list) act_file.close() def sylls_counter(word): vowels = open('vowels.txt', encoding = 'UTF-8') s = vowels.read() mark = s.split() vowels.close() count = 0 for item in word: if item in mark: count += 1 return count def obj(): obj_file = open('objects.txt', encoding = 'UTF-8') s = obj_file.read() obj_list = s.split() return random.choice(obj_list) obj_file.close() def puncmark(): punc_file = open('punctuation.txt', encoding = 'UTF-8') s = punc_file.read() punc_list = s.split() return random.choice(punc_list) punc_file.close() def composer(): line = (subj().capitalize() + ' ' + act() + ' ' + obj()) + puncmark() return line def check7(w7): if sylls_counter(w7) == 7: return 1 else: return 0 def check5(w5): if sylls_counter(w5) == 5: return 1 else: return 0 def final_tanka(): for i in range(5): a = composer() if i in [0, 2]: c = 0 while c != 1: a = composer() c = check5(a) print(a) else: d = 0 while d != 1: a = composer() d = check7(a) print(a) final_tanka() print("Введите, пожалуйста,три любых числа") a = int(input()) b = int(input()) c = int(input()) if a/b == c: print("Частное от деления числа a на число b равно числу c") else: print("Частное от деления числа a на число b не равно числу c") if a**b == c: print("Число a в степени b равно числу с") else: print("Число a в степени b не равно числу c") print("Введите, пожалуйста,три любых числа") a = int(input()) b = int(input()) c = int(input()) if a+b == c: print("Сумма чисел a и b равна числу c") else: print("Сумма чисел a и b не равна числу c") if a*b == c: print("Произведение чисел a и b равно числу c") else: print("Произведение чисел a и b не равно числу c") if a%b == c: print ("Число a даёт остаток, равный числу c при делении на число b") else: print ("Число a не даёт остаток, равный числу c при делении на число b") if -1*b/a == c: print("Число c является решением линейного уравнения ax+b") else: print("Число c не является решением линейного уравнения ax+b") if a/b == c: print("Частное от деления числа a на число b равно числу c") else: print("Частное от деления числа a на число b не равно числу c") if a**b == c: print("Число a в степени b равно числу с") else: print("Число a в степени b не равно числу c") import re t_file = open('Finnish.html', encoding = 'UTF-8') lines = t_file.read() t_file.close() clearing = re.compile('<[/]?[a-z]*>') lines = re.sub(u'<.*?>', u'', lines, flags = re.U) p1 = re.compile('ISO 639-3\D', re.IGNORECASE) p2 = re.compile('\n[a-z]{3}', re.IGNORECASE) m1 = p1.search(lines) m2 = p2.search(lines[m1.end():len(lines)]) print(m1.group(), m2.group()) import os def file_counter(): Max = 0 count = 0 for root, dirs, files in os.walk('.'): for item in root: if not item.startswith('.'): count = len(files) if count > Max: name = os.path.join(root) Max = count count = 0 print('Максимальное количество файлов -', str(Max) + ',', 'содержится в папке ', name) file_counter() import re my_file = open('Birds.html', 'r', encoding = 'UTF-8') text = my_file.read() my_file.close() text = re.sub(u'<.*?>&a-zA-Z', u'', text, flags = re.U) arr = text.split() var1 = re.compile('Птиц(а|ы|у|е|ам|ах|ами)') var2 = re.compile('[^\w]птиц(а|ы|у|е|ам|ах|ами)') var3 = re.compile('Птицей') var4 = re.compile('[^\w]птицей') s = '' for item in arr: if re.search(var1, item): item = re.sub(u'Птиц', u'Рыб', item, flags = re.U) elif re.search(var2, item): item = re.sub(u'птиц', u'рыб', item, flags = re.U) elif re.search(var3, item): item = re.sub(u'Птицей', u'Рыбой', item, flags = re.U) elif re.search(var4, item): item = re.sub(u'птицей', u'рыбой', item, flags = re.U) for item in arr: s += ' ' + item + ' ' new_file = open('Forms.txt', 'w', encoding = 'UTF-8') new_file.write(s) import random def gettingfile(): t=[] with open ('словник.txt','r',encoding='utf-8') as f: t=f.readlines() return t def getcomponent(tag): a=[] for i in gettingfile(): if tag in i: a.append(i[:i.find(tag)]) return random.choice(a) def getline(tags): s=[] for tag in tags: s.append(getcomponent(tag)) capital=s[0][0] if not capital.isupper(): capital=capital.upper() s[0]=capital+s[0][1:] line=''.join(s) return line[:len(line)-1] def getprep(line,iter): if 'Зачем' in line: return '?' elif 'Как так' in line: return '?!' elif iter==3: return '.' else: return ',' def main(): wordtags=[['N.M.anim','V.TR-M','N.-ACC','N-INSTR'],['CLIT1','N-GEN','ADV','N1','V.INTR'],\ ['ADRB1','N.M.anim','V.INTR','ADJ-M'],['CLIT2','ADV','V-INF','ADRB2','N.-ACC']] for c in range(4): l=getline(random.choice(wordtags)) print(l+getprep(l,c)) input() return if __name__=='__main__': main() import re import os import shutil def printfilenames(path): printed = [] counter = 0 for i in os.listdir(path): if os.path.isfile(i): a = i[:i.rfind('.')] if a not in printed: print(i) printed.append(a) if not re.search('[0-9]',a): counter += 1 else: if i not in printed: print(i) printed.append(i) print('Количество файлов, не содержащих цифры в названии = ', counter) return def main(): printfilenames('.') if __name__ == '__main__': main() import random def getdictionary(a): with open(a,'r',encoding='utf-8') as f: x=f.readlines() d=dict() for line in x: line=[i for i in line.split(',')] d[line[0]]=line[1:] return d def game(wordlist): keys=[i for i in wordlist.keys()] word=random.choice(keys) t=True points=''.join(['.']*len(word)) while t: print(random.choice(wordlist[word])+' '+points+' ?') ans=input().lower() if ans==word: print('Правильно !') t=False else: print('Неправильно. Ещё одну попытку ?') r=input().lower() while r!='да': if r=='нет': return else: print('Неверный ввод, введите "да" или "нет" ') r=input().lower() return def main(): print('Сыграем в игру ?') a=input().lower() while a!='нет': if a.lower()=='да': game(getdictionary('Слова и подсказки.csv')) print('Cыграем ещё раз ?') a=input().lower() continue elif a!='нет': print('Неверный ввод, введите "да" или "нет" ') a=input().lower() print('До свидания !') if __name__=='__main__': main() s=input() for i in range(1,len(s)+1): print(s[:i]) import re def getsentences (filename): try: with open (filename, 'r', encoding = 'utf-8') as f: text = f.read() if text: smark = re.compile('\w.*?[.…!?\n]') sentences = re.findall(smark, text) if sentences: sentences = [punctcut(i) for i in sentences] sentences = [i for i in sentences if i] else: print('Предложений не найдено') return sentences else: print ('Выбран пустой текстовый файл') return False except UnicodeDecodeError: print('Неверная кодировка! Нужен файл в utf-8') def punctcut(s): words = re.findall('\w*-?\w*',s) if words: words = [i for i in words if i] return ' '.join(words) else: return '' def main(): upwordscheck = False sentences = getsentences(input('Введите имя файла: ')) if sentences: for sentence in sentences: if sentence.count(' ')>9: upwords=[i for i in sentence.split(' ') if i.istitle()] if upwords: upwordscheck = True print('{:^20}'.format(' '.join(upwords))) if not upwordscheck: print('Слов с большой буквы в предложениях длиннее 10 слов не найдено') if __name__ == '__main__': main() with open(input(),'r+', encoding='utf-8') as f: allines=f.readlines() minlen=len(allines[0]) maxlen=0 for i in allines: if i and i!='\n': if len(i)>maxlen: maxlen=len(i) if len(i)5: print(i) import re def getwords(filename): with open (filename,'r',encoding='utf-8') as t: text=t.readlines() words=set() for line in text: if line and line!='\n': lw=[i.strip(',.!()[]{};:""?<>-\n') for i in line.split(' ')] for w in lw: if w: words.add(w.lower()) return words def main(): f=re.compile('на(ш(л[аои]|е(л|дший?)|ёл)|й(ти|д(и(те)?|ут?|[её](нн(ый|ое|ая)|шь|те?|м)|я)))') words=getwords(input('Введите имя файла: ')) forms=set() for word in words: if f.match(word)!=None: forms.add(word) for i in forms: print(i) if __name__=='__main__': main() import re def getxt(): with open(input('Введите имя входного файла: '),'r',encoding='utf-8') as f: text=f.read() return text def printinf(s): with open(input('Введите имя выходного файла: '),'w',encoding='utf-8') as f: f.write(s) return def main(): a=r'[\s\S]*?<.*?>Часовой пояс([\s\S]*?)' b=r'(<.*?>)([^<>]*)' timezone=re.search(a,getxt()).group(1) onlywords=re.findall(b,timezone) timezone='' for i in onlywords: timezone+=i[1] printinf(timezone) if __name__=='__main__': main() def getwords(filename): with open (filename,'r',encoding='utf-8') as f: text=f.read() words=[i.strip(',!.";:()-«»') for i in text.split(' ')] return words def wordswithending(words,a): m=[] c=0 for word in words: if len(word)>len(a): if word[len(word)-len(a):]==a: if not word in m: c+=1 m.append(word) print ('Количество разных слов с окончанием -'+a+' в тексте: ',c) return m def maxwordfrequency(m): freqs=[] for i in range(len(m)): freqs.append(0) for word in m: if m[i]==word: freqs[i]+=1 maxind=0 for i in range(len(freqs)): if freqs[i]>maxind: maxind=i return m[maxind] def main(): x=maxwordfrequency(wordswithending(getwords(input('Введите имя файла: ')),'ness')) print('Слово с максимальной частотой: ',x) if __name__=='__main__': main() import os import re def savedict(d, filename): with open(filename,'w', encoding='utf-8') as f: for key in d: line = key + '\t' + str(d[key])+'\n' f.write(line) return def savearray(ar, filename): with open(filename,'w', encoding='utf-8') as f: for el in ar: line = '\t'.join(el)+'\n' f.write(line) return def findbigrams(t): bigrams = [] sentences = re.findall('[\w\W]+?', t) for sentence in sentences: bigram = '' raw_sentence = re.sub('<.*?>','',sentence) raw_sentence = re.sub('\n','',raw_sentence) words = re.findall('[\w\W]+?', sentence) for i in range(len(words)): if ('gr="PR' in words[i]) and (i','',words[i])+' '+re.sub('<.*?>','',words[i+1]) if bigram: bigrams.append([bigram,raw_sentence]) return bigrams def task1(): d = dict() path = '.\\news' for root, dirs, files in os.walk(path): for file in files: with open (os.path.join(root,file), 'r', encoding = 'cp1251') as f: txt = f.read() d[file] = txt.count('') savedict(d, 'sentencesnumber.csv') def task2(): lines = [] lines.append(["Название файла", "Автор", "Тематика текста"]) for root, dirs, files in os.walk(path): for file in files: nm = file with open (os.path.join(root,file), 'r', encoding = 'cp1251') as f: txt = f.read() author = re.search(r'',txt).group(1) topic = re.search(r'',txt).group(1) lines.append([nm, author, topic]) savearray(lines, 'authors_and_topics.csv') def task3(): corpus = '' for root, dirs, files in os.walk(path): for file in files: with open (os.path.join(root,file), 'r', encoding = 'cp1251') as f: txt = f.read() corpus += txt bigrams = findbigrams(corpus) savearray(bigrams, 'bigrams.csv') def main(): task1() task2() task3() if __name__ == '__main__': path = '.\\news' main() word='' while word=='': print('Введите 1 русское слово(без пробелов и знаков препинания):') word=input() if word: for i in word: if ord(i)<128 or 175241: print('Слово может содержать только кириллические буквы без пробелов и др. знаков') word='' break if word=='': continue for index,elem in enumerate(word): if index%2==0 and (elem=='о' or elem=='п' or elem=='е'): print(elem,end=' ') else: print('Нужно ввести слово') print('Введите число a:') a=int(input()) print('Введите число b:') b=int(input()) print('Введите число c:') c=int(input()) d='не' e='не' if a*b==c: d='' if a*c+b==0: e='' print('произведение ',a,'и ',b,d,'равно ',c) print(c,e,'является корнем уравнения ',a,'x+',b,'=0') import re def task1(filename): with open (filename, 'r', encoding='utf-8') as f: l=len(f.readlines()) with open ('Stringnumber.txt', 'w', encoding='utf-8') as t: t.write('The number of strings is '+str(l)) return def task2(filename): d=dict() with open(filename,'r',encoding='utf-8') as f: text=f.read() t=re.compile(r'(.*?)') words=re.findall(w,text) with open ('f.csv','w',encoding='utf-8') as f: for i in words: f.write(','.join(i)+'\n') return def task3(filename): d=dict() with open(filename,'r',encoding='utf-8') as f: text=f.read() t=re.compile(r' mco: mco = c if not c == 0: dct[root.split('\\')[len(root.split('\\'))-1]] = c return dct, mco def find_right_one(dct,mco): ar = [] for key in dct: if dct[key] == mco: ar.append(key) return ar def main(): ar = find_right_one(find_folder()[0],find_folder()[1]) print("Папка(-и), где больше всего файлов: " + " | ".join(ar)) if __name__ == "__main__": main() def getwords(): f = open("hemingway.txt",'r') ar = f.read().split() f.close() for i,word in enumerate(ar): ar[i] = word.strip("!?.”,:;’\'\"-—“").lower() return(ar) def un(ar): arr = [] for word in ar: if word.startswith('un'): arr.append(word) return(arr) def verify(ar,inp): amount = 0 for word in ar: if len(word)>inp: amount += 1 return str(amount/len(ar)*100)+"%"+" of words are longer than " + str(inp) + " letters." def main(): while True: inp = int(input("Enter a number. Enter 0 to close the program. ")) if inp > 0: print(verify(un(getwords()),inp)) else: print("Bye!") break if __name__ == "__main__": main() import random def open_file(): f = open("d.csv","r") ar = [] for line in f.readlines(): ar.append(line.split()) f.close() return ar def create_d(ar): d = {} for line in ar: if line[0] == "inv": d[line[2]] = line[1] + "*" elif line[0] == "n": d[line[1]] = line[2] return d def verify(word): inp = input("Угадайте слово: ") if inp == word: print(congr(True)) else: print(congr(False)) def congr(sw): pos = ["Вы угадали!","Ура, Вы угадали!","Угадали...","Наконец-то! Вы угадали!","Неужели вы -- угадали?.."] neg = ["Не угадали, попробуйте еще.","Неправильно.","Увы -- неправильно...","Вовсе нет, пробуйте еще."] if sw: return random.choice(pos) else: return random.choice(neg) def guess(d): key = random.choice(list(d.keys())) ar = [key,d[key]] if ar[1].endswith("*"): print(ar[1].strip("*") + "...") verify(ar[0]) else: print("..." + ar[1]) verify(ar[0]) def main(): while True: guess(create_d(open_file())) if input("Хотите продолжить? Сделайте пустой ввод, если хотите. Если нет -- введите что-нибудь: ") != "": print("До свидания!") break if __name__ == "__main__": main() import re def getar(): f = open("download-excel.xml","r",encoding = "utf8") ar = f.read() f.close() return ar def count_lines(ar): f = open("out1.txt","w",encoding="utf8") f.write(str(len(ar.split("\n")))) f.close() def dct_morph(ar): arr = re.findall("(?:.*?)",ar) dct = {} for key in arr: if key not in dct: dct[key] = 1 else: dct[key] += 1 return dct def dct_morph_out(dct): f = open("out2.txt","w",encoding="utf8") st = "" for key in dct: st = st + str(key) + "\n" f.write(st) f.close() def find_adj(text): arr = re.findall("(?:.*?)",text) dct = {} for key in arr: if key not in dct: dct[key] = 1 else: dct[key] += 1 return dct def find_adj_out(dct): f = open("out3.txt","w",encoding="utf8") st = "" for key in dct: st = st + str(key) + " " + str(dct[key]) + "\n" f.write(st) f.close() def look_better(text): arr = re.findall("(.*?)",text) st = "" for el in arr: st = st + el[0] + "," + el[1] + "," + el[2] + "\n" text1 = re.sub("(.|\n)*","\n"+st+"\n",text) f = open("out4.csv","w",encoding="utf8") f.write(text1) f.close() def main(): while True: st = input('Введите 1 для 1 п., 2 для 2 п., 3 для 3 п. задания, любой другой символ для выхода:') if st == "1": count_lines(getar()) elif st == "2": dct_morph_out(dct_morph(getar())) elif st == "3": find_adj_out(find_adj(getar())) look_better(getar()) else: print("До свидания.") break if __name__ == "__main__": main() import re def gettext(): f = open("1.html",'r',encoding="utf8") ar = f.read() f.close() return ar def findcode(st): reg = "(http://www-01\.sil\.org/iso639-3/documentation\.asp\?id=)([a-z][a-z][a-z])" matches = re.findall(reg,st) return matches[0][1] def main(): print("Код этого языка: "+findcode(gettext())) if __name__ == "__main__": main() import re def getar(): f = open("1.txt",'r',encoding="utf8") ar = f.read().split() f.close() return ar def normalize(ar): punct = "!?.,:;\'\"-—" arr = [] for word in ar: word = word.strip("!?.,:;\«»'\"…-—()][*1234567890").lower() if word != "": arr.append(word) return arr def findverb(ar): arr = [] for word in ar: if re.match("(сиде?(л|ть|в|я))|(сиж(у|ива))|(сиди(те)?)",word): arr.append(word) return arr def main(): print(" ".join(findverb(normalize(getar())))) if __name__ == "__main__": main() def getar(inp): f = open(inp + ".csv",'r',encoding="utf-8") a = f.read().split("\n") b = [] for el in a: b.append(el.split(";")) return b def add_feature(arX,arY): ar1 = arX ar2 = arY ar = [] arr = [] for line2 in ar2: for line1 in ar1: if line2[0] == line1[1]: ar.append("*".join(line1)+"*"+line2[1]) for line in ar: arr.append(line.split("*")) for i in range(0,100): print(arr[i]) return arr def getlines(ar): lines = [] lines1 = [] for i in range(0,len(ar[1])): lines.append("") for n in range(0,len(ar[1])): for line in ar: if not n in [4,5]: lines[n] += "\"{}\", ".format(line[n]) elif n in [4,5]: lines[n] += "{}, ".format(line[n]) for line in lines: lines1.append(line[:-2]) return lines1 def outp(ar): f = open("output.txt",'w+',encoding="utf-8") st = "locat <- data.frame(language = c({}), dialect = c({}), latitude = c({}), longitude = c({}), feature = c({}))".format(ar[1],ar[2],ar[3],ar[4],ar[5]) f.write(st) outp(getlines(add_feature(getar("csv-template"),getar("features")))) def getar(inp): f = open(inp + ".csv",'r',encoding="utf-8") a = f.read().split("\n") b = [] for el in a: if not el == "": b.append(el.split(";")) for el in b: for ell in el: if "\ufeff" in ell: print(ell) ell = ell.replace("\ufeff","") return b def add_feature(arX,arY): ar1 = arX ar2 = arY ar = [] arr = [] for l1 in ar1: for l2 in ar2: if l1[3] == l2[0]: ar.append("*".join(l1)+"*"+l2[1]) for line in ar: arr.append(line.split("*")) return arr def out2(ar): arr = [] for el in ar: arr.append(";".join(el)) st = "\n".join(arr) f = open("output2.csv","w+",encoding="utf-8") f.write(st) f.close() def getlines(ar): lines = [] lines1 = [] for i in range(0,len(ar[1])): lines.append("") for n in range(0,len(ar[1])): for line in ar: if not n in [4,5]: lines[n] += "\"{}\", ".format(line[n]) elif n in [4,5]: lines[n] += "{}, ".format(line[n]) for line in lines: lines1.append(line[:-2]) return lines1 def outp(ar): f = open("output.txt",'w+',encoding="utf-8") st = "locat <- data.frame(language = c({}), dialect = c({}), latitude = c({}), longitude = c({}), feature = c({}))".format(ar[1],ar[2],ar[3],ar[4],ar[5]) f.write(st) def main(): print("fawfwa") out2(add_feature(getar("allvillages"),getar("features"))) if __name__ == "__main__": main() import os import re def open_file(name): f = open(name,"r") txt = f.read() f.close() return re.findall(".*[.,;:?!]*", txt) def write_to_file(st,filename): f = open(filename,"w+",encoding = "utf-8") f.write(st) f.close() def count_words_infile(): st = "" for roots,dirs,files in os.walk("."): for file in files: if file.endswith(".xhtml"): st = st + file + "\t"+ str(len(open_file(os.path.join(roots,file)))) + "\n" write_to_file(st,"output_1.txt") def find_data(): fd = [] for roots,dirs,files in os.walk("."): for file in files: if file.endswith(".xhtml"): with open(os.path.join(roots,file)) as text: if file.endswith(".xhtml"): t = text.read() fd.append([file,"".join(re.findall("",t)),\ "".join(re.findall("",t))]) st = "Название файла;Автор;Год создания\n" for line in fd: st = st + ";".join(line) + "\n" write_to_file(st,"output_2.csv") def find_bigrams(): bigrams = [] for roots,dirs,files in os.walk("."): for file in files: if file.endswith(".xhtml"): ar = open_file(os.path.join(roots,file)) for i,word in enumerate(ar): if re.search("(.+)<",word).group(1)+" " +re.search("ana>(.+)<",ar[i+1]).group(1) bigrams.append(bigram) def cleanse(): text = "" for roots,dirs,files in os.walk("."): for file in files: if file.endswith(".xhtml"): ar = open_file(os.path.join(roots,file)) for word in ar: text = text + " " + "".join(list(re.findall("ana>(.+)<.*([.,;:?!])?",word))) write_to_file(text,"test_cleanse.txt") def main(): find_bigrams() find_data() count_words_infile() main() def main(): num = int(input("Enter a number ")) nump = 2 while num >= nump: print(nump) nump *= 2 main() def main(): st = input("Enter a string: ") print(st) while len(st) > 1: st = st[1:len(st)-1] print(st) main() import re def gettext(): f = open("1.txt",'r',encoding="utf8") ar = f.read() f.close() return ar def repl(text): s1 = re.sub("Птиц(|а|ы|у|е|ей|ам|ами|ах)","Рыб\\1",(re.sub("птиц(|а|ы|у|е|ей|ам|ами|ах)","рыб\\1",text))) return(s1) def savetext(text): f = open("2.txt","w",encoding="utf8") f.write(text) f.close() def main(): savetext(repl(gettext())) if __name__ == "__main__": main() def intver(): a = input("Enter the number:") while not a.isdigit(): a = input("You shouldn't enter any letters. Enter again:") a = int(a) return a a = intver() b = intver() c = intver() if a/b == c: print("a/b=c") else: print("a/b doesn't equal c") if a**b == c: print("a^b=c") else: print("a^b doesn't equal c") import re def gettext(): f = open("1.txt",'r',encoding="utf8") ar = f.read() f.close() return ar def splitting(s): s1 = re.sub("([А-Я]|род)\.","\\1&&&",s) s1 = re.sub("(\.|!|\?)","\\1 ar = s1.split(" for i in range(0,len(ar)): ar[i] = ar[i].replace("&&&",".") return ar print("\n".join(splitting(gettext()))) import random def file_op(name): f = open(name,'r') ar = f.read().split(' ') f.close() return(ar) def adverb(): ar = file_op("adverbs.txt") return random.choice(ar) + ' ' def int_clit(): ar = file_op("int_clitics.txt") return random.choice(ar) + ' ' def verb(): ar = file_op("verbs.txt") return random.choice(ar) + ' ' def name_fem(): ar = file_op("name_fem.txt") return random.choice(ar) + ' ' def adj_fem1(): ar = file_op("adj_fem1.txt") return random.choice(ar) + ' ' def adj_fem2(): ar = file_op("adj_fem2.txt") return random.choice(ar) + ' ' def prepositions1(): ar = file_op("prepositions1.txt") return random.choice(ar) + ' ' def names_prcaus(q): if q == "sg": ar = file_op("names_prcaus_sg.txt") return random.choice(ar) + ' ' elif q == "pl": ar = file_op("names_prcaus_pl.txt") return random.choice(ar) + ' ' def adj_prcaus(q): if q == "sg": ar = file_op("adj_prcaus_sg.txt") return random.choice(ar) + ' ' elif q == "pl": ar = file_op("adj_prcaus_pl.txt") return random.choice(ar) + ' ' def name_gen(): ar = file_op("name_gen.txt") return random.choice(ar) + ' ' def adj_gen(): ar = file_op("adj_gen.txt") return random.choice(ar) + ' ' def name_nom(): ar = file_op("name_nom.txt") return random.choice(ar) + ' ' def adj_nom(): ar = file_op("adj_nom.txt") return random.choice(ar) + ' ' def name_gen_sg(): ar = file_op("name_gen_sg.txt") return random.choice(ar) def punct(): ar = file_op("punct.txt") return random.choice(ar) def verse1(): return int_clit() + adverb() + verb() def verse2(): if random.randint(1,2) == 1: return(name_fem() + adj_fem2()) return(adj_fem1() + name_fem()) def verse3(): if random.randint(1,2) == 1: return "в " + names_prcaus("sg") + adj_prcaus("sg") return "в " + names_prcaus("pl") + adj_prcaus("pl") def verse4(): return prepositions1() + name_gen() + adj_gen() def verse5(): return adj_nom() + name_nom() + name_gen_sg() + punct() def main(): print("Ныне забытый поэт-символист В.К. в начале 20 в. написал цикл стихотворений танка; цикл посвящен В.Я. Брюсову. \nВ.К. \nПять танок.") for i in range(1,6): print(i) print(verse1().capitalize() + "\n" + verse2().capitalize() + "\n" + verse3().capitalize() + "\n" + verse4().capitalize() + "\n" + verse5().capitalize()) print("1908 г.") main() def task1(): f = open('freq.txt','r',encoding = "UTF-8") line = [] for i in f: line = i.split(' | ') if line[1] == 'союз': print(' | '.join(line)) f.close() def task2(): f = open('freq.txt','r',encoding = "UTF-8") line = [] nom = [] st = "" summ = 0 for i in f: line = i.split(' | ') nom = line[1].split() if nom[0] == "сущ" and len(nom)==5: if nom[2] == "ед" and nom[3] == "жен": st += line[0] + ", " summ += float(line[2]) f.close() print(st) print("Суммарный ipm: ", summ) def npt(): ary = [] while True: np = input("Enter a word").lower() if np == "": print("Конец ввода") break ary.append(np) return ary def task3(): ary = npt() f = open('freq.txt','r',encoding = "UTF-8") line = [] dictio = [] ver = False for i in f: dictio.append(i.split(" | ")) for word in ary: for i in range(len(dictio)): ver = False if word == dictio[i][0]: print("Для слова " + "\"" + word + "\"" + ": "+ dictio[i][1]+ " | "+ dictio[i][2]) ver = True break if not ver: print("Слова " + "\""+ word +"\" " + "в словаре нет.") f.close() def main(): while True: a = int(input("Введите номер задания: ")) if a == 1: task1() elif a==2: task2() elif a==3: task3() else: break if __name__ == '__main__': main() def main(): f = open('1.txt','r') summ = 0 per = 0 ary = [] for line in f: summ += 1 ary = line.split() i = -1 for word in ary: i += 1 if ary[i] in "-?!,.:;()\"'": ary.pop(i) if len(ary) > 5: per += 1 print(str(per/summ*100)+"%") f.close() if __name__ == '__main__': main() def npt(): ar = [] for i in range(8): ar.append(input("Enter a string: ")) return ar def main(): ar = npt() for i in range(0,7,2): print(ar[i]+ar[i+1]) main() import os def cyr_lat(cyr,lat,st): iscyr = False islat = False for letter in st: if not iscyr and letter in cyr: iscyr = True elif not islat and letter in lat: islat = True if iscyr and islat: return True def find_folders(): ar = os.listdir('.') c = 0 for sth in ar: if os.path.isdir(sth) and cyr_lat('абвгдеёжзийклмнопрстуфхцчшщъыьэюя','abcdefghijklmnopqrstuvwxyz',sth): c += 1 return c def nrep_list(): ar = os.listdir('.') dct = {} for el in ar: if os.path.isdir(el): dct[el]=1 else: for i in range(0,len(el)): if el[len(el)-1-i] == ".": dct[el[:len(el)-1-i]]=1 break print('\n'.join([key for key in dct])) def main(): print("{} пап(ка|ки|ок) с кириллическими и латинскими символами. \nНеповторяющиеся имена файлов/папок:".format(find_folders())) nrep_list() if __name__ == '__main__': main() import re def getar_sentences(): f = open("solj.txt",'r',encoding="utf-8") st = f.read() ar = [re.sub("\n"," ",re.sub("[,;\"}:-]",'',sentence).lower()) for sentence in re.split('[!?.]', st) if sentence != ''] f.close() return ar def freq(word,ar): count = 0 for w in ar: if w == word: count += 1 return count def count_rep(ar): arr = [sentar.split() for sentar in ar] for sentence in arr: d = {word:freq(word,sentence) for word in sentence} for key in d: if d[key] > 1: print('{}{:^20}'.format(key,d[key])) for sentence in arr: for word in sentence: if not word in d: d[word] = 1 else: d[word] += 1 def main(): count_rep(getar_sentences()) if __name__ == "__main__": count_rep(getar_sentences()) main() import random def words(file): f = open(file, "r", encoding = "utf(8)") f1 = f.read() arr = f1.split() f.close() return arr def noun(number): singular = words("sing_nouns.txt") plural = words("pl_nouns.txt") if number == 's': return random.choice(singular) return random.choice(plural) def noun2(): nouns = words("nouns2.txt") return random.choice(nouns) def punctuation(): marks = [".", "?", "!", "..."] return random.choice(marks) def verb(syllables): verbs2 = words("verbs2.txt") verbs3 = words("verbs3.txt") if syllables == 2: return random.choice(verbs2) return random.choice(verbs3) def noun_phrase(): clitics = words("clitics.txt") clitic = random.choice(clitics) noun1 = noun('s') return clitic + ' ' + noun1 def verse1(): return noun('s') + ' ' + verb(3) + punctuation() def verse2(): return noun('s') + ' ' + verb(2) + ' ' + noun('pl') + punctuation() def verse3(): return verb(3) + ' ' + noun('s') + punctuation() def verse4(): return noun_phrase() + ' ' + verb(2) + ' ' + noun2() + punctuation() def make_verse(syllables): if syllables == 5: verse = random.choice([1,3]) if verse == 1: return verse1() else: return verse3() else: verse = random.choice([2,4]) if verse == 2: return verse2() else: return verse4() print(make_verse(5)) print(make_verse(7)) print(make_verse(5)) print(make_verse(7)) print(make_verse(7)) import os def num_files(path): c = 0 for f in os.listdir(path): if os.path.isfile(os.path.join(path, f)): c += 1 return c def most_files(path): n = num_files(path) name = path for root, dirs, files in os.walk(path): for d in dirs: new = os.path.join(root, d) c = num_files(new) if c > n: n = c name = d return name print(most_files('.')) f = open("isl.xml", "r", encoding = "utf(8)") d = {} for line in f: if "" not in line: count +=1 line = f.readline() f.close() f1 = open("res.txt", "w", encoding = "utf(8)") f1.write(str(count)) f1.close() import re f = open("isl.xml", "r", encoding = "utf(8)") f1 = open("res2.txt", "w", encoding = "utf(8)") for line in f: m = re.search(r'type="f.h\w*">', line) if m != None: s = line[m.end():] n = re.search(r'\w*', s) if n != None: f1.write(n.group() + ', ') f1.close() f.close() f = open("isl.xml", "r", encoding = "utf(8)") f1 = f.read() a = re.search("", f1) b = re.search(r"\s*", f1) f2 = f1[a.end()+1:b.start()] dtags = re.sub(r'<.*?>', '', f2, flags = re.DOTALL) f.close() import random d = {} file = open("words.csv", "r") for line in file: line = line.strip('\n') arr = line.split(';') d[arr[0]] = arr[1] file.close() a = [] for key in d: a.append(key) word = random.choice(a) print("Подсказка:", word, "...") noun = input() if noun == d[word]: win = ["ура!", "вы отгадали", "победа"] print(random.choice(win)) else: lose = ["вы не отгадали", "вы проиграли", "попробуйте еще раз"] print(random.choice(lose)) import re import os def sentences(text): text1 = re.split('[.?!]', text) return text1 def lengths(folder): d = {} for f in os.listdir(folder): text = open(os.path.join(folder, f), 'r') s = text.read() text.close() m = re.sub(u'<.*?>', u'', s, flags = re.DOTALL) s1 = sentences(m) d[f] = len(s1) return d def write_lengths(d): f = open('text.txt', 'w') for key in d: f.write(key + '{:>6}'.format(d[key]) + '\n') f.close() folder = 'news' d = lengths(folder) write_lengths(d) line = input() j = len(line) - 1 k = 1 for i in range(len(line)//2): print(line[k:j:]) j = j-1 k = k+1 file_name = input("Введите путь к файлу: ") f = open(file_name, "r", encoding = "utf(8)") lines = 0 i = 0 for line in f: arr = line.split() if len(arr) > 5: i += 1 lines += 1 percentage = i/lines * 100 print(percentage, "% строк содержит больше 5 строк.") f.close() import re f = open("Птицы.html", "r", encoding = "utf(8)") f1 = f.read() f.close() m = re.sub(u"\\bптиц(?=(\\b|(ы|у|а(х|ми?)?|е)\\b))", "рыб", f1) k = re.sub(u"\\bПтиц(?=(\\b|(ы|у|а(х|ми?)?|е)\\b))", "Рыб", m) m = re.sub(u"\\bПтицей\\b", "Рыбой", k) k = re.sub(u"\\bптицей\\b", "рыбой", m) f = open("new.html", "w", encoding = "utf(8)") f.write(k) f.close import re rex = r"\bси(жу|д(я(т|щ(и(й|е|х|ми?)|е(го|му?|й|е)|ая|ую))?|и(шь|т(е|ся)?)?|е(ть|л(а|о(сь)?|и)?|в(ш(и(й|е|х|ми?)?|е(го|му?|й|ю|е)|ая|ую))?)))\b" file = input("Введите название файла: ") f = open(file, "r", encoding = "utf(8)") f1 = f.read() f1 = f1.lower() arr = f1.split() for i, word in enumerate(arr): arr[i] = word.strip('.,:;&!«»()/-') f.close() found = [] for word in arr: m = re.search(rex, word) if m != None: if word not in found: print(word) found.append(word) a = [] for i in range(8): new_element = input() a.append(new_element) for i in range(0, 8, 2): print(a[i], a[i+1], sep = '') import re def sentences(text): text1 = re.split('[.?!]', text) return text1 def del_punct(text): text1 = [re.sub('[^\w\s]', '', sentence) for sentence in text] return text1 def count(text): for sentence in text: words = sentence.split() arr = [] for word in words: cnt = 0 if word not in arr: arr.append(word) for w in words: if word == w: cnt += 1 if cnt > 1: print(word, '{:^30}'.format(cnt)) f = open('собачка.txt', 'r', encoding = 'utf(8)') f1 = f.read() f1 = f1.lower() s = sentences(f1) d = del_punct(s) count(d) a = int(input("Введите число a: ")) b = int(input("Введите число b: ")) c = int(input("Введите число c: ")) if a / b == c: print(a, "разделить на", b, "равно", c) else: print(a, "разделить на", b, "не равно", c) if a ** b == c: print(a, "в степени", b, "равно" , c) else: print(a, "в степени", b, "не равно", c) n = int(input("Введите число: ")) i = 0 while 2**i <= n: print(2**i) i+=1 import os import re def cyr_lat(path): names = os.listdir(path) count = 0 for name in names: path1 = os.path.join(path, name) if os.path.isdir(path1): if re.search('[a-zA-Z]', name) and re.search('[а-яА-Я]', name): count+=1 return count n = cyr_lat('.') print(os.listdir('.')) print(n) def words(): file = input("Название файла: ") f = open(file, "r", encoding = "utf(8)") f1 = f.read() f1 = f1.lower() arr = f1.split() for i, word in enumerate(arr): arr[i] = word.strip('.,:;!?"()') f.close() return arr def un(): arr = words() arr1 = [] for word in arr: if word[0] == "u" and word[1] == "n": arr1.append(word) return arr1 def percentage(): arr = un() un_number = len(arr) print(un_number, "слов с приставкой un") count = 0 length = int(input("Cлова длиннее, чем: ")) for word in arr: if len(word) > length: count += 1 percent = count/un_number * 100 return percent number = percentage() print(number, "%") import os def countfiles(path): dic = {} for root, dirs, files in os.walk('.'): dic[len(files)] = root return dic[sorted(list(dic.keys()))[len(list(dic.keys()))-1]] def main(): print(countfiles('.')) main() arr=[] for i in range(8): inpstr=input() arr.append(inpstr) for i in range(4): print(arr[i*2]+arr[i*2+1]) a={} total=0 with5=0 with open('intext.txt', 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: total=total+1 a = line.split(' ') if len(a)>5: with5=with5+1 print(with5/total*100) istr=input() for i in range(len(istr)//2+len(istr)%2): print(istr[i:len(istr)-i]) import re def getarray(filename): wordarr=[] with open(filename, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: linewords=line.split() for word in linewords: wordarr.append(cleanword(word)) return wordarr def cleanword(word): word=word.lower() falsechars = [] for i in range(len(word)): if re.search("[a-яё]",word[i]) == None: falsechars.append(word[i]) for char in falsechars: word = word.replace(char,"") return word def searchforms(cleanedarray): for word in cleanedarray: if re.match("си(жу|ди(шь|м|те?)?|де(л[аои]?|в(ш(ая|е(му?|е|го|й)|ую|и(й|х|е|ми?)?))?|ть)|дя(т|щ(ая|е(му?|е|го|й)|ую|и(й|х|е|ми?)))?)\Z",word) != None: print(word) searchforms(getarray("gulag.txt")) with open('aphor.txt', 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: if len(line.split()) < 17: print (line) resstr="" aphors=0 with open('aphor.txt', 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: splitted = line.split() for word in splitted: if word[len(word)-1]=="." or word[len(word)-1]=="," or word[len(word)-1]=="?": word=word[:-1:] word=word.lower() if word=="ум": aphors=aphors+1 if resstr!="": resstr+=", " resstr+=splitted[len(splitted)-1] break print('Цитат, содержащих слово "ум": '+str(aphors)) print(resstr) print() print("Введите слова:") wordarr=[] while True: i=input() if i=="": break wordarr.append(i) with open('aphor.txt', 'r', encoding='utf-8') as f: lines = f.readlines() for inpword in wordarr: print(inpword+":") printed=False inpword=inpword.lower() for line in lines: splitted = line.split() for word in splitted: if word[len(word)-1]=="." or word[len(word)-1]=="," or word[len(word)-1]=="?": word=word[:-1:] word=word.lower() if word==inpword: printed=True print(line[:-1:]) break if not printed: print("Слово "+inpword+" в цитатах не найдено") print() import os import re def getfolders(): folder = os.listdir() dirsdict = {} dirs = 0 for entity in folder: if os.path.isdir(entity) and re.search(r'[a-zA-Z]',entity) and re.search(r'[а-яёА-ЯЁ]',entity): dirs += 1 if entity not in dirsdict: print(entity) print() print (dirs,'directories total found.') getfolders() print("Введите число: ") n=int(input()) i=1 while n>=i: print(i); i=i*2 def getarray(filename): wordarr=[] with open(filename, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: linewords=line.split() for word in linewords: wordarr.append(cleanword(word)) return wordarr def cleanword(word): word=word.lower() falsechars = [] for i in range(len(word)): if ord(word[i]) < 97 or ord(word[i]) > 123: falsechars.append(word[i]) for char in falsechars: word = word.replace(char,"") return word def get_value_and_percentage(arrayname, minlength): unWords = 0 unWordsByLength = 0 for word in arrayname: if word[:2] == "un": unWords = unWords + 1 if len(word) > minlength: unWordsByLength = unWordsByLength + 1 print("Слов с приставкой un-: ",unWords) if unWords > 0: print("Процент слов с количеством символов больще ", minlength,": ",unWordsByLength/unWords*100) else: print("Процент слов с количеством символов больще ", minlength,": ",0) fpath="Austen Jane.txt" inplength = int(input("В искомых словах символов должно быть больше чем: ")) print("Анализируем файл ",fpath,"...") get_value_and_percentage(getarray(fpath), inplength) import random import re def getdict (filepath): dic = {} with open(filepath, 'r', encoding='utf-8') as csv: rows = csv.readlines() for row in rows: rowvals = re.split(';|,|\n',row.replace(' ','')) if len(rowvals) == 2: dic[rowvals[0]] = rowvals[1] else: continue return dic def orderresponses(correctfile,incorrectfile): responses = {} responses["Correct"] = [] with open(correctfile, 'r', encoding='utf-8') as responsefile: lineresponses = responsefile.readlines() for response in lineresponses: if len(response) > 1: responses["Correct"].append(response) responses["Incorrect"] = [] with open(incorrectfile, 'r', encoding='utf-8') as responsefile: lineresponses = responsefile.readlines() for response in lineresponses: if len(response) > 1: responses["Incorrect"].append(response) return responses def riddle (dictname,orderedresponses): words = list(dictname.values()) hints = list(dictname.keys()) currenthint = random.choice(hints) while True: option = input(currenthint+' ') if dictname[currenthint] == option: print(random.choice(orderedresponses["Correct"])) break else: print(random.choice(orderedresponses["Incorrect"])) riddle(getdict("in.csv"),orderresponses("correct.txt","incorrect.txt")) import re def cleanword(word): word=word.lower() falsechars = [] for i in range(len(word)): if re.search("[a-яё\-]",word[i]) == None: falsechars.append(word[i]) for char in falsechars: word = word.replace(char,"") if word == '-': word = '' return word def main(): with open ('profession.txt' , 'r', encoding='utf-8') as f: sentences = re.split(r'([.?!]|(\.\.\.)) ',re.sub(r'[\t\n]',' ',f.read())) for sentence in sentences: if not sentence: continue lexemes = [cleanword(word) for word in sentence.split() if cleanword(word)] dic = {lexeme: 0 for lexeme in lexemes} for lexeme in lexemes: dic[lexeme]=dic[lexeme]+1 outdic = {key: dic[key] for key in sorted(list(dic.keys())) if dic[key] > 1} template = '{} {:^10}' for wrd in sorted(list(outdic.keys())): print(template.format(wrd,outdic[wrd])) main() import re def main(): with open("birds.html",'r',encoding='utf-8') as file: f = file.read() f = re.sub(r'([^а-яё])птиц((у|ы|а(м?и?|х?)|е)?[^а-яё])',r'\1рыб\2',f) f = re.sub(r'([^а-яё])птицей([^а-яё])',r'\1рыбой\2',f) f = re.sub(r'([^а-яё])Птиц((у|ы|а(м?и?|х?)|е)?[^а-яё])',r'\1Рыб\2',f) f = re.sub(r'([^а-яё])Птицей([^а-яё])',r'\1Рыбой\2',f) with open("fishes.html",'w',encoding='utf-8') as outfile: outfile.write(f) main() import os import re def countwords(filepath): W=0 with open (filepath,'r') as infile: lines = infile.readlines() for line in lines: if line[:3]=="": W+=1 return W def printwordscount(fileslist): with open ('words_count.txt','w',encoding='utf-8') as outfile: for i in range(len(fileslist)): outfile.write(fileslist[i][1]+'\t'+str(countwords(fileslist[i][0]))+'\n') def makefileslist(folderpath): outlist=[] filenames=os.listdir(folderpath) for filename in filenames: outlist.append([folderpath+os.sep+filename,filename]) return outlist def getmeta(filepath): outstr="" with open (filepath,'r') as infile: lines = infile.readlines() for line in lines: if line[:5]=="',line): sentencesarr.append([]) if line[:3]=="": sentencesarr[len(sentencesarr)-1].append(line) for sentence in sentencesarr: sentencestr="" for line in sentence: sentencestr+=re.sub(r'<.?w>||\n|<.p>||| ','',line) sentencestr+=' ' buffarr=[] prevA=False for line in sentence: if re.search('A=',line) and re.search('gen',line) and not prevA: buffarr.append(re.search('(.*?)',line).group(1)) prevA=True elif re.search('S,',line) and re.search('gen',line) and prevA: buffarr.append(re.search('(.*?)',line).group(1)) bigramstxt.write(buffarr[0]+' '+buffarr[1]+'\t'+sentencestr+'\n') buffarr.pop() buffarr.pop() prevA=False else: prevA=False if len(buffarr)>0: buffarr.pop() def main(): flist=makefileslist('news') printwordscount(flist) makecsv(flist) makebigrams(flist) main() import random NEXTGENDER="" NEXTCASE="nom" NEEDSVERB=False ISANIM=False def sylls(word): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM outs=0 word=word.lower() for i in range(len(word)): if word[i]=='а' or word[i]=='е' or word[i]=='ё' or word[i]=='и' or word[i]=='о' or word[i]=='у' or word[i]=='ы' or word[i]=='э' or word[i]=='ю' or word[i]=='я': outs = outs + 1 return outs def verb(category,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM if category == "past_m": past_m = [] f = open("v_past_m.txt", 'r', encoding='utf-8') for word in f: past_m.append(word) NEXTGENDER="m" pick = random.choice(past_m)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(past_m)[:-1] return pick if category == "past_n": past_n = [] f = open("v_past_n.txt", 'r', encoding='utf-8') for word in f: past_n.append(word) NEXTGENDER="n" pick = random.choice(past_n)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(past_n)[:-1] return pick if category == "past_f": past_f = [] f = open("v_past_f.txt", 'r', encoding='utf-8') for word in f: past_f.append(word) NEXTGENDER="f" pick = random.choice(past_f)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(past_f)[:-1] return pick else: present = [] f = open("v_praes_tran.txt", 'r', encoding='utf-8') for word in f: present.append(word) pick = random.choice(present)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(present)[:-1] NEXTCASE="acc" return pick def bigram(gender,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM casearr=[] bigramarr=[] if gender == "m": f = open("v_abl_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("abl") f = open("v_acc_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("acc") f = open("v_dat_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("dat") f = open("v_gen_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("gen") elif gender == "f": f = open("v_abl_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("abl") f = open("v_acc_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("acc") f = open("v_dat_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("dat") f = open("v_gen_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("gen") else: f = open("v_abl_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("abl") f = open("v_acc_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("acc") f = open("v_dat_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("dat") f = open("v_gen_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("gen") pick = random.randint(0,len(bigramarr)-1) res = bigramarr[pick][:-1] while sylls(res) < min_syllables or sylls(res) > max_syllables: pick = random.randint(0,len(bigramarr)-1) res = bigramarr[pick][:-1] NEXTCASE=casearr[pick] return res def noun(case,gender,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM if case == "nom": nomnouns = [] nomgenders = [] c=0 f = open("nouns_f_nom.txt", 'r', encoding='utf-8') for word in f: nomnouns.append(word) nomgenders.append("f") if gender == "f": pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] return pick c = len(nomnouns) f = open("nouns_m_nom.txt", 'r', encoding='utf-8') for word in f: nomnouns.append(word) nomgenders.append("m") if gender == "m": pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] return pick c = len(nomnouns) f = open("nouns_n_nom.txt", 'r', encoding='utf-8') for word in f: nomnouns.append(word) nomgenders.append("n") if gender == "n": pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] return pick else: s = random.randint(c,len(nomnouns)-1) pick = nomnouns[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(c,len(nomnouns)-1) pick = nomnouns[s][:-1] NEXTGENDER = nomgenders[s] return pick if case == "gen": gennouns = [] c=0 f = open("nouns_f_gen.txt", 'r', encoding='utf-8') for word in f: gennouns.append(word) if gender == "f": pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] return pick c = len(gennouns) f = open("nouns_m_gen.txt", 'r', encoding='utf-8') for word in f: gennouns.append(word) if gender == "m": pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] return pick c = len(gennouns) f = open("nouns_n_gen.txt", 'r', encoding='utf-8') for word in f: gennouns.append(word) if gender == "n": pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] return pick else: pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] return pick if case == "dat": datnouns = [] c=0 f = open("nouns_f_dat.txt", 'r', encoding='utf-8') for word in f: datnouns.append(word) if gender == "f": pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] return pick c = len(datnouns) f = open("nouns_m_dat.txt", 'r', encoding='utf-8') for word in f: datnouns.append(word) if gender == "m": pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] return pick c = len(datnouns) f = open("nouns_n_dat.txt", 'r', encoding='utf-8') for word in f: datnouns.append(word) if gender == "n": pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] return pick else: pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] return pick if case == "ins": insnouns = [] c=0 f = open("nouns_f_ins.txt", 'r', encoding='utf-8') for word in f: insnouns.append(word) if gender == "f": pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] return pick c = len(insnouns) f = open("nouns_m_ins.txt", 'r', encoding='utf-8') for word in f: insnouns.append(word) if gender == "m": pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] return pick c = len(insnouns) f = open("nouns_n_ins.txt", 'r', encoding='utf-8') for word in f: insnouns.append(word) if gender == "n": pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] return pick else: pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] return pick if case == "abl": ablnouns = [] c=0 f = open("nouns_f_abl.txt", 'r', encoding='utf-8') for word in f: ablnouns.append(word) if gender == "f": pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] return pick c = len(ablnouns) f = open("nouns_m_abl.txt", 'r', encoding='utf-8') for word in f: ablnouns.append(word) if gender == "m": pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] return pick c = len(ablnouns) f = open("nouns_n_abl.txt", 'r', encoding='utf-8') for word in f: ablnouns.append(word) if gender == "n": pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] return pick else: pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] return pick else: accnouns = [] c=0 f = open("nouns_f_acc.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "f": pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] return pick c = len(accnouns) f = open("nouns_m_acc_anim.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "m" and ISANIM: s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] return pick c = len(accnouns) f = open("nouns_m_acc_inan.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "m": s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] return pick c = len(accnouns) f = open("nouns_n_acc.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "n": pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] return pick else: pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] return pick def adj(case,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM if case == "nom": nomarr = [] nomgender = [] f = open("adj_f_nom.txt", 'r', encoding='utf-8') for word in f: nomarr.append(word) nomgender.append("f") f = open("adj_m_nom.txt", 'r', encoding='utf-8') for word in f: nomarr.append(word) nomgender.append("m") f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') for word in f: nomarr.append(word) nomgender.append("n") s = random.randint(0,len(nomarr)-1) pick = nomarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(nomarr)-1) pick = nomarr[s][:-1] NEXTGENDER=nomgender[s] return pick if case == "gen": genarr = [] gengender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: genarr.append(word) gengender.append("f") f = open("adj_mn_gen.txt", 'r', encoding='utf-8') for word in f: genarr.append(word) gengender.append("m") f = open("adj_mn_gen.txt", 'r', encoding='utf-8') for word in f: genarr.append(word) gengender.append("n") s = random.randint(0,len(genarr)-1) pick = genarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(genarr)-1) pick = genarr[s][:-1] NEXTGENDER=gengender[s] return pick if case == "dat": datarr = [] datgender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: datarr.append(word) datgender.append("f") f = open("adj_mn_dat.txt", 'r', encoding='utf-8') for word in f: datarr.append(word) datgender.append("m") f = open("adj_mn_dat.txt", 'r', encoding='utf-8') for word in f: datarr.append(word) datgender.append("n") s = random.randint(0,len(datarr)-1) pick = datarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(datarr)-1) pick = datarr[s][:-1] NEXTGENDER=datgender[s] return pick if case == "ins": insarr = [] insgender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: insarr.append(word) insgender.append("f") f = open("adj_mn_ins.txt", 'r', encoding='utf-8') for word in f: insarr.append(word) insgender.append("m") f = open("adj_mn_ins.txt", 'r', encoding='utf-8') for word in f: insarr.append(word) insgender.append("n") s = random.randint(0,len(insarr)-1) pick = insarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(insarr)-1) pick = insarr[s][:-1] NEXTGENDER=insgender[s] return pick if case == "abl": ablarr = [] ablgender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: ablarr.append(word) ablgender.append("f") f = open("adj_mn_abl.txt", 'r', encoding='utf-8') for word in f: ablarr.append(word) ablgender.append("m") f = open("adj_mn_abl.txt", 'r', encoding='utf-8') for word in f: ablarr.append(word) ablgender.append("n") s = random.randint(0,len(ablarr)-1) pick = ablarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(ablarr)-1) pick = ablarr[s][:-1] NEXTGENDER=ablgender[s] return pick else: accarr = [] accgender = [] f = open("adj_f_acc.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("f") if ISANIM: f = open("adj_m_acc_anim.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("m") else: f = open("adj_m_acc_inan.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("m") f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("n") s = random.randint(0,len(accarr)-1) pick = accarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(accarr)-1) pick = accarr[s][:-1] NEXTGENDER=accgender[s] return pick def adv(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM advs = [] f = open("adv.txt", 'r', encoding='utf-8') for word in f: advs.append(word) return random.choice(advs)[:-1] def punctuation(isend): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM marks = [".", "?", "!", "...", ","] r = random.choice(marks) while r=="," and isend!="nonend": r = random.choice(marks) return r def verbverse7(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM s="" sylls_here=0 opt = random.choice([1,2,3]) if opt == 1 or opt == 2: s=bigram(NEXTGENDER,1,4) sylls_here=sylls(s) s+=' ' if sylls_here >= 3: s+=noun(NEXTCASE,"indiff",7-sylls_here,7-sylls_here) else: s+=adj(NEXTCASE,2,3) sylls_here=sylls(s) s+=' ' s+=noun(NEXTCASE,"indiff",7-sylls_here,7-sylls_here) s+=punctuation("nonend") s=s.capitalize() NEEDSVERB=False else: b="past_" b+=NEXTGENDER s+=verb(b,1,3) sylls_here=sylls(s) s+=' ' s+=adj("nom",2,2) sylls_here=sylls(s) s+=' ' s+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) s=s.capitalize() return s def verbverse5(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM s=adv() sylls_here=sylls(s) s+=' ' b="past_" b+=NEXTGENDER s+=verb(b,5-sylls_here,5-sylls_here) s+=punctuation("nonend") s=s.capitalize() NEEDSVERB=False return s def verse7a(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM sylls_here=0 v=verb("present_trans",2,3) sylls_here=sylls(v) v+=' ' v+=adj("acc",2,5-sylls_here) sylls_here=sylls(v) v+=' ' v+=noun("acc",NEXTGENDER,7-sylls_here,7-sylls_here) v+=punctuation("nonend") v=v.capitalize() return v def verse7b(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM sylls_here=0 v=adj("nom",2,3) sylls_here=sylls(v) v+=' ' v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) v+=' ' b="past_" b+=NEXTGENDER v+=verb(b,2,2) v+=punctuation("nonend") v=v.capitalize() return v def verse7c(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM sylls_here=0 v=adv() sylls_here=sylls(v) v+=' ' verse = random.choice([1,2,3]) if verse == 1: v+=verb("past_m",2,5-sylls_here) elif verse == 2: v+=verb("past_f",2,5-sylls_here) else: v+=verb("past_n",2,5-sylls_here) sylls_here=sylls(v) v+=' ' v+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) v+=punctuation("nonend") v=v.capitalize() return v def verse5a(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM sylls_here=0 v=verb("present_nontrans",2,3) sylls_here=sylls(v) v+=' ' v+=noun(NEXTCASE,"indiff",5-sylls_here,5-sylls_here) v+=punctuation("nonend") v=v.capitalize() return v def verse5b(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM sylls_here=0 v=adj("nom",2,3) sylls_here=sylls(v) v+=' ' v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) v=v.capitalize() NEEDSVERB=True return v def verse5c(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM sylls_here=0 v=noun("nom","indiff",2,3) sylls_here=sylls(v) v+=' ' b="past_" b+=NEXTGENDER v+=verb(b,5-sylls_here,5-sylls_here) v=v.capitalize() v+=punctuation("nonend") return v def make_verse7(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM if NEEDSVERB: verbverse7() verse = random.choice([1,2,3]) if verse == 1: return verse7a() elif verse == 2: return verse7b() else: return verse7c() def make_verse5(): global NEXTCASE global NEXTGENDER global NEEDSVERB global ISANIM if NEEDSVERB: verbverse5() verse = random.choice([1,2,3]) if verse == 1: return verse5a() elif verse == 2: return verse5b() else: return verse5c() for n in range(random.randint(1,5)): print(make_verse5()) print(make_verse7()) print(make_verse5()) print(make_verse7()) lastv=make_verse7() if lastv[-3:] == "...": lastv = lastv[:-3]+punctuation("end") else: lastv = lastv[:-1]+punctuation("end") print(lastv) print() import random NEXTGENDER="" NEXTCASE="nom" NEEDSVERB=False ISANIM=False def sylls(word): outs=0 word=word.lower() for i in range(len(word)): if word[i]=='а' or word[i]=='е' or word[i]=='ё' or word[i]=='и' or word[i]=='о' or word[i]=='у' or word[i]=='ы' or word[i]=='э' or word[i]=='ю' or word[i]=='я': outs = outs + 1 return outs def verb(category,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER if category == "past_m": past_m = [] f = open("v_past_m.txt", 'r', encoding='utf-8') for word in f: past_m.append(word) NEXTGENDER="m" pick = random.choice(past_m)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(past_m)[:-1] return pick if category == "past_n": past_n = [] f = open("v_past_n.txt", 'r', encoding='utf-8') for word in f: past_n.append(word) NEXTGENDER="n" pick = random.choice(past_n)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(past_n)[:-1] return pick if category == "past_f": past_f = [] f = open("v_past_f.txt", 'r', encoding='utf-8') for word in f: past_f.append(word) NEXTGENDER="f" pick = random.choice(past_f)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(past_f)[:-1] return pick else: present = [] f = open("v_praes_tran.txt", 'r', encoding='utf-8') for word in f: present.append(word) pick = random.choice(present)[:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = random.choice(present)[:-1] NEXTCASE="acc" return pick def bigram(gender,min_syllables,max_syllables): global NEXTCASE casearr=[] bigramarr=[] if gender == "m": f = open("v_abl_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("abl") f = open("v_acc_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("acc") f = open("v_dat_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("dat") f = open("v_gen_m.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("gen") elif gender == "f": f = open("v_abl_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("abl") f = open("v_acc_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("acc") f = open("v_dat_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("dat") f = open("v_gen_f.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("gen") else: f = open("v_abl_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("abl") f = open("v_acc_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("acc") f = open("v_dat_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("dat") f = open("v_gen_n.txt", 'r', encoding='utf-8') for word in f: bigramarr.append(word) casearr.append("gen") pick = random.randint(0,len(bigramarr)-1) res = bigramarr[pick][:-1] while sylls(res) < min_syllables or sylls(res) > max_syllables: pick = random.randint(0,len(bigramarr)-1) res = bigramarr[pick][:-1] NEXTCASE=casearr[pick] return res def noun(case,gender,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER global ISANIM if case == "nom": nomnouns = [] nomgenders = [] c=0 f = open("nouns_f_nom.txt", 'r', encoding='utf-8') for word in f: nomnouns.append(word) nomgenders.append("f") if gender == "f": pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] return pick c = len(nomnouns) f = open("nouns_m_nom.txt", 'r', encoding='utf-8') for word in f: nomnouns.append(word) nomgenders.append("m") if gender == "m": pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] return pick c = len(nomnouns) f = open("nouns_n_nom.txt", 'r', encoding='utf-8') for word in f: nomnouns.append(word) nomgenders.append("n") if gender == "n": pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = nomnouns[random.randint(c,len(nomnouns)-1)][:-1] return pick else: s = random.randint(c,len(nomnouns)-1) pick = nomnouns[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(c,len(nomnouns)-1) pick = nomnouns[s][:-1] NEXTGENDER = nomgenders[s] return pick if case == "gen": gennouns = [] c=0 f = open("nouns_f_gen.txt", 'r', encoding='utf-8') for word in f: gennouns.append(word) if gender == "f": pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] return pick c = len(gennouns) f = open("nouns_m_gen.txt", 'r', encoding='utf-8') for word in f: gennouns.append(word) if gender == "m": pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] return pick c = len(gennouns) f = open("nouns_n_gen.txt", 'r', encoding='utf-8') for word in f: gennouns.append(word) if gender == "n": pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(c,len(gennouns)-1)][:-1] return pick else: pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = gennouns[random.randint(0,len(gennouns)-1)][:-1] return pick if case == "dat": datnouns = [] c=0 f = open("nouns_f_dat.txt", 'r', encoding='utf-8') for word in f: datnouns.append(word) if gender == "f": pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] return pick c = len(datnouns) f = open("nouns_m_dat.txt", 'r', encoding='utf-8') for word in f: datnouns.append(word) if gender == "m": pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] return pick c = len(datnouns) f = open("nouns_n_dat.txt", 'r', encoding='utf-8') for word in f: datnouns.append(word) if gender == "n": pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(c,len(datnouns)-1)][:-1] return pick else: pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = datnouns[random.randint(0,len(datnouns)-1)][:-1] return pick if case == "ins": insnouns = [] c=0 f = open("nouns_f_ins.txt", 'r', encoding='utf-8') for word in f: insnouns.append(word) if gender == "f": pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] return pick c = len(insnouns) f = open("nouns_m_ins.txt", 'r', encoding='utf-8') for word in f: insnouns.append(word) if gender == "m": pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] return pick c = len(insnouns) f = open("nouns_n_ins.txt", 'r', encoding='utf-8') for word in f: insnouns.append(word) if gender == "n": pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(c,len(insnouns)-1)][:-1] return pick else: pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = insnouns[random.randint(0,len(insnouns)-1)][:-1] return pick if case == "abl": ablnouns = [] c=0 f = open("nouns_f_abl.txt", 'r', encoding='utf-8') for word in f: ablnouns.append(word) if gender == "f": pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] return pick c = len(ablnouns) f = open("nouns_m_abl.txt", 'r', encoding='utf-8') for word in f: ablnouns.append(word) if gender == "m": pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] return pick c = len(ablnouns) f = open("nouns_n_abl.txt", 'r', encoding='utf-8') for word in f: ablnouns.append(word) if gender == "n": pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(c,len(ablnouns)-1)][:-1] return pick else: pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = ablnouns[random.randint(0,len(ablnouns)-1)][:-1] return pick else: accnouns = [] c=0 f = open("nouns_f_acc.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "f": pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] return pick c = len(accnouns) f = open("nouns_m_acc_anim.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "m" and ISANIM: s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] return pick c = len(accnouns) f = open("nouns_m_acc_inan.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "m": s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(c,len(accnouns)-1) pick = accnouns[s][:-1] return pick c = len(accnouns) f = open("nouns_n_acc.txt", 'r', encoding='utf-8') for word in f: accnouns.append(word) if gender == "n": pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = accnouns[random.randint(c,len(accnouns)-1)][:-1] return pick else: pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: pick = accnouns[random.randint(0,len(accnouns)-1)][:-1] return pick def adj(case,min_syllables,max_syllables): global NEXTCASE global NEXTGENDER global ISANIM if case == "nom": nomarr = [] nomgender = [] f = open("adj_f_nom.txt", 'r', encoding='utf-8') for word in f: nomarr.append(word) nomgender.append("f") f = open("adj_m_nom.txt", 'r', encoding='utf-8') for word in f: nomarr.append(word) nomgender.append("m") f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') for word in f: nomarr.append(word) nomgender.append("n") s = random.randint(0,len(nomarr)-1) pick = nomarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(nomarr)-1) pick = nomarr[s][:-1] NEXTGENDER=nomgender[s] return pick if case == "gen": genarr = [] gengender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: genarr.append(word) gengender.append("f") f = open("adj_mn_gen.txt", 'r', encoding='utf-8') for word in f: genarr.append(word) gengender.append("m") f = open("adj_mn_gen.txt", 'r', encoding='utf-8') for word in f: genarr.append(word) gengender.append("n") s = random.randint(0,len(genarr)-1) pick = genarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(genarr)-1) pick = genarr[s][:-1] NEXTGENDER=gengender[s] return pick if case == "dat": datarr = [] datgender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: datarr.append(word) datgender.append("f") f = open("adj_mn_dat.txt", 'r', encoding='utf-8') for word in f: datarr.append(word) datgender.append("m") f = open("adj_mn_dat.txt", 'r', encoding='utf-8') for word in f: datarr.append(word) datgender.append("n") s = random.randint(0,len(datarr)-1) pick = datarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(datarr)-1) pick = datarr[s][:-1] NEXTGENDER=datgender[s] return pick if case == "ins": insarr = [] insgender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: insarr.append(word) insgender.append("f") f = open("adj_mn_ins.txt", 'r', encoding='utf-8') for word in f: insarr.append(word) insgender.append("m") f = open("adj_mn_ins.txt", 'r', encoding='utf-8') for word in f: insarr.append(word) insgender.append("n") s = random.randint(0,len(insarr)-1) pick = insarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(insarr)-1) pick = insarr[s][:-1] NEXTGENDER=insgender[s] return pick if case == "abl": ablarr = [] ablgender = [] f = open("adj_f_gen_dat_ins_abl.txt", 'r', encoding='utf-8') for word in f: ablarr.append(word) ablgender.append("f") f = open("adj_mn_abl.txt", 'r', encoding='utf-8') for word in f: ablarr.append(word) ablgender.append("m") f = open("adj_mn_abl.txt", 'r', encoding='utf-8') for word in f: ablarr.append(word) ablgender.append("n") s = random.randint(0,len(ablarr)-1) pick = ablarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(ablarr)-1) pick = ablarr[s][:-1] NEXTGENDER=ablgender[s] return pick else: accarr = [] accgender = [] f = open("adj_f_acc.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("f") if ISANIM: f = open("adj_m_acc_anim.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("m") else: f = open("adj_m_acc_inan.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("m") f = open("adj_n_nom_acc.txt", 'r', encoding='utf-8') for word in f: accarr.append(word) accgender.append("n") s = random.randint(0,len(accarr)-1) pick = accarr[s][:-1] while sylls(pick) < min_syllables or sylls(pick) > max_syllables: s = random.randint(0,len(accarr)-1) pick = accarr[s][:-1] NEXTGENDER=accgender[s] return pick def adv(): advs = [] f = open("adv.txt", 'r', encoding='utf-8') for word in f: advs.append(word) return random.choice(advs)[:-1] def punctuation(isend): marks = [".", "?", "!", "...", ","] r = random.choice(marks) while r=="," and isend!="nonend": r = random.choice(marks) return r def verbverse7(islast): global NEXTCASE global NEXTGENDER global NEEDSVERB s="" sylls_here=0 opt = random.choice([1,2,3]) if opt == 1 or opt == 2 or islast: s=bigram(NEXTGENDER,1,4) sylls_here=sylls(s) s+=' ' if sylls_here >= 3: s+=noun(NEXTCASE,"indiff",7-sylls_here,7-sylls_here) else: s+=adj(NEXTCASE,2,3) sylls_here=sylls(s) s+=' ' s+=noun(NEXTCASE,NEXTGENDER,7-sylls_here,7-sylls_here) s+=punctuation("nonend") s=s.capitalize() NEEDSVERB=False else: b="past_" b+=NEXTGENDER s+=verb(b,1,3) sylls_here=sylls(s) s=s.capitalize() s+=punctuation("end") s+=' ' s+=adj("nom",2,2).capitalize() sylls_here=sylls(s) s+=' ' s+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) return s def verbverse5(): global NEXTGENDER global NEEDSVERB s=adv() sylls_here=sylls(s) s+=' ' b="past_" b+=NEXTGENDER s+=verb(b,5-sylls_here,5-sylls_here) s+=punctuation("nonend") s=s.capitalize() NEEDSVERB=False return s def verse7a(): global NEXTGENDER sylls_here=0 v=verb("present_trans",2,3) sylls_here=sylls(v) v+=' ' v+=adj("acc",2,5-sylls_here) sylls_here=sylls(v) v+=' ' v+=noun("acc",NEXTGENDER,7-sylls_here,7-sylls_here) v+=punctuation("nonend") v=v.capitalize() return v def verse7b(): global NEXTGENDER sylls_here=0 v=adj("nom",2,3) sylls_here=sylls(v) v+=' ' v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) v+=' ' b="past_" b+=NEXTGENDER v+=verb(b,2,2) v+=punctuation("nonend") v=v.capitalize() return v def verse7c(): global NEXTGENDER sylls_here=0 v=adv() sylls_here=sylls(v) v+=' ' verse = random.choice([1,2,3]) if verse == 1: v+=verb("past_m",2,5-sylls_here) elif verse == 2: v+=verb("past_f",2,5-sylls_here) else: v+=verb("past_n",2,5-sylls_here) sylls_here=sylls(v) v+=' ' v+=noun("nom",NEXTGENDER,7-sylls_here,7-sylls_here) v+=punctuation("nonend") v=v.capitalize() return v def verse5a(): global NEXTCASE sylls_here=0 v=verb("present_nontrans",2,3) sylls_here=sylls(v) v+=' ' v+=noun(NEXTCASE,"indiff",5-sylls_here,5-sylls_here) v+=punctuation("nonend") v=v.capitalize() return v def verse5b(): global NEXTGENDER global NEEDSVERB sylls_here=0 v=adj("nom",2,3) sylls_here=sylls(v) v+=' ' v+=noun("nom",NEXTGENDER,5-sylls_here,5-sylls_here) v=v.capitalize() NEEDSVERB=True return v def verse5c(): global NEXTGENDER sylls_here=0 v=noun("nom","indiff",2,3) sylls_here=sylls(v) v+=' ' b="past_" b+=NEXTGENDER v+=verb(b,5-sylls_here,5-sylls_here) v=v.capitalize() v+=punctuation("nonend") return v def make_verse7(islast): global NEEDSVERB if NEEDSVERB: return verbverse7(islast) verse = random.choice([1,2,3]) if verse == 1: return verse7a() elif verse == 2: return verse7b() else: return verse7c() def make_verse5(): global NEEDSVERB if NEEDSVERB: return verbverse5() verse = random.choice([1,2,3]) if verse == 1: return verse5a() elif verse == 2: return verse5b() else: return verse5c() for n in range(random.randint(1,5)): print(make_verse5()) print(make_verse7(False)) print(make_verse5()) print(make_verse7(False)) lastv=make_verse7(True) if lastv[-3:] == "...": lastv = lastv[:-3]+punctuation("end") else: lastv = lastv[:-1]+punctuation("end") print(lastv) print() import re def getcode(filename): with open (filename,'r',encoding='utf-8') as f: t=f.read() return re.search('ISO 639-3(.|\n)*?http:\/\/www-01\.sil\.org\/iso639-3\/documentation\.asp\?id=(...)',t).group(2) def main(): print(getcode('korean.html')) main() import os import re text = [] k = 0 file = open("file.txt", "w") folder = 'news' for f in os.listdir(folder): with open(os.path.join(folder, f)) as text: word = f.read() text = text.split('\n') for word in text: m = re.search('', word) if m: k +=1 file.write(f +' ' + str(k) +'\n') k = 0 file.close() import re def opentext(): words = [] with open ('test2.xml', 'r', encoding = 'utf-8') as f: text = f.read() words = text.split('>') for i in range(len(words)): words[i] = words[i].strip('.,?*()«»') return words def countstr(): text = opentext() k = 1 for strin in text: if strin.startswith('\n'[0]): k = k+1 print (k) return k def writetext(): b = [] a = countstr() a = str(a) b.append(a) with open ('text.txt', 'w', encoding='utf-8') as f: f.write(a) writetext() def diction(): ar1 = [] ar2 = [] ar3 = [] text = opentext() for strin in text: if re.search(r"lemma=", strin): if strin not in ar1: ar1.append(strin) for strin in ar1: b = re.findall('type="([a-zA-Z0-9þ]+)', strin) for i in b: if i not in ar2: ar2.append(i) print(ar2) for a in ar2: l=0 for line in text: if re.findall('type="(' + a + ')', line): l=l+1 ar3.append(l) print(ar3) d = dict(zip(ar2, ar3)) print(d) with open ('text.txt', 'a', encoding='utf-8') as f: for key in d: f.write('\n'+ key) diction() words = [] word = input("Введите слово ") while word != (""): if len(word) > 5: words.append(word) word = input("Введите слово ") print('\n'.join(words)) import re def opentext(): words = [] with open ('text.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() text = text.replace('ё', 'е') words = text.split() for i in range(len(words)): words[i] = words[i].strip('.,?*()«»') return words def reg(): text = [] ar1 = [] text = opentext() for word in text: if re.search(r"\bна((ш((л[аио](сь)?)|(ел(ся)?)|(едши(й(ся)?)?)))|(йти(сь)?)|(йд((я|у|ете|и(те)?(сь)?)|(ут|е(м|т|шь)(ся)?)|(енный))))\b", word): if word not in ar1: ar1.append(word) print (ar1) reg() word = input() a = word[0::2] for a in a[0:]: if (a.find("п") != -1): print ("п") if (a.find("о") != -1): print ("о") if (a.find("е") != -1): print ("е") word = input() for i in range(len(word)): print (word[:i+1]) import re a = input ('File name: ') with open (a, 'r', encoding='utf-8') as f: b = re.findall('>UTC((?:\+|-)(?:[0-9]|10|11|12|13|14)(?::(?:30|45))?)', f.read()) print (' '.join(b)) with open ('text.txt', 'w', encoding='utf-8') as f: for i in b: f.write(i) dots = [] dictionary = {'белый':'снег','мобильный':'телефон','контрольная':'работа','диковинный':'зверь','фотографическая':'память','резиновый':'сапог','дремучий':'лес'} for word in dictionary: l = len(dictionary[word]) while l!=0: l=l-1 dots.append(".") d = ''.join(dots) print('\n',word, d) dots.clear() answ = input() if answ == dictionary[word]: print ('Right!') else: print ('Try again :(') list = [] with open('prob.txt', 'r', encoding='utf-8') as f: for line in f.readlines(): lengh = len(line) list.append(lengh) print(list) maxim = list[0] minim = list[0] for num in list: if num <= minim: minim = num if num > maxim: maxim = num print(minim) print(maxim) print(maxim/minim) def opentext(): print("Введите название файла: ") name = input() words = [] with open (name, 'r', encoding = 'utf-8') as f: text = f.read() words = text.split() for i in range(len(words)): words[i] = words[i].strip('.,?*()«»') return words def nouns(): text = [] adj = [] k = 0 text = opentext() for word in text: l = len(word) if word[l-4:] == 'ness': adj.append(word) return(adj) def freq(): maxim = 0 adjectives = nouns() for word in adjectives: k = 0 for word1 in adjectives: if word1 == word: k = k+1 if k>maxim: maxim = k mostfreq = word return (mostfreq) def count(): summ = [] words = nouns() for word in words: s = 0 for word1 in words: if word == word1: del word1 s = s+1 if s == 0: summ.append(word) return (len(summ)) def function1(): a = count() w = freq() print(a) print(w) a = int(input()) b = int(input()) c = int(input()) if a * b == c: print ('a умножить на b равно c') else: print ('a умножить на b НЕ равно c') if a * c + b == 0: print ('c является решением линейного уравнения ax + b = 0') else: print ('c НЕ является решением линейного уравнения ax + b = 0') import re def opentext(): words = [] with open ('Викинги — Википедия.html', 'r', encoding = 'utf-8') as f: text = f.read() m = re.sub('викинг','бурундук',text) n = re.sub('Викинг','Бурундук',m) with open ('Викинги — Википедия.html', 'w', encoding = 'utf-8') as f: f.write(n) opentext() import os import re def preprocessing (content): notags = re.sub(r'\<[^>]*\>', '',content) nons = re.sub (r'\n', '', notags) return nons def countsen (): for root, dirs, files in os.walk(os.path.join(os.getcwd(), 'news')): for f in files: with open (os.path.join(root, f), 'r', encoding='Windows-1251')as n: content = n.read() content1 = preprocessing(content) numsen = content1.split('.') num = str(len(numsen)) template = "{}\t{}\n" with open('numsens.txt', 'a', encoding='utf-8') as k: k.write(template.format(f, num)) countsen() def author(): for root, dirs, files in os.walk(os.path.join(os.getcwd(), 'news')): for f in files: with open (os.path.join(root, f), 'r', encoding='Windows-1251')as n: content = n.read() content1 = preprocessing(content) words = content1.split(' ') author = words [0]+' '+words[1] name = f topic = re.search(r'', content) with open('table.csv', 'a', encoding='Windows-1251') as k: k.write(name+author) author() import random def qws(): with open('words1.txt','r', encoding = 'utf-8') as ws1: wss1=[] for line in ws1: line = line.strip() wss1.append(line) return random.choice(wss1) def wws(): with open('words2.txt','r', encoding = 'utf-8') as ws2: wss2=[] for line in ws2: line = line.strip() wss2.append(line) return random.choice(wss2) def ews(): with open('words3.txt','r', encoding = 'utf-8') as ws3: wss3=[] for line in ws3: line = line.strip() wss3.append(line) return random.choice(wss3) def verb(): with open('words2v.txt','r', encoding = 'utf-8') as ws2v: wss2v=[] for line in ws2v: line = line.strip() wss2v.append(line) return random.choice(wss2v) def p51(): pros=random.choice([1, 2, 3]) if pros == 1: return qws()+' '+wws()+' '+verb() elif pros == 2: return verb()+' '+wws()+' '+qws() else: return wws()+' '+qws()+' '+verb() def p52(): pr=random.choice([1, 2]) if pr == 1: return ews()+' '+verb() else: return verb()+' '+ews() def p5(): prost = random.choice([1, 2]) if prost == 1: return p51() else: return p52() def very(): with open('ochen.txt','r', encoding = 'utf-8') as och: oche=[] for line in och: line = line.strip() oche.append(line) return random.choice(oche) def red(): with open('adjn.txt','r', encoding = 'utf-8') as adj: adjs=[] for line in adj: line = line.strip() adjs.append(line) return random.choice(adjs) def plat(): with open('pla.txt','r', encoding = 'utf-8') as pla: plas=[] for line in pla: line = line.strip() plas.append(line) return random.choice(plas) def znak(): zn = [".", "!", "..."] return random.choice(zn) def p7(): return very()+' '+red()+' '+plat()+znak() def maybe(): with open('maybe.txt','r', encoding = 'utf-8') as may: be=[] for line in may: line = line.strip() be.append(line) return random.choice(be) def sun(): with open('pla.txt','r', encoding = 'utf-8') as suns: sunn=[] for line in suns: line = line.strip() sunn.append(line) return random.choice(sunn) def fin(): with open('fin.txt','r', encoding = 'utf-8') as vse: vses=[] for line in vse: line = line.strip() vses.append(line) return random.choice(vses) def last(): return maybe()+', '+sun()+' '+fin() def poem(): print (p5()) print (p7()) print (p5()) print (p7()) print (last()) poem() import re from math import log punct = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): longwords=[] text_wo_punct = re.sub(punct, '', text.lower()) words = text_wo_punct.strip().split() for word in words: if len(word)>=4: longwords.append(word) return longwords import os anek = '' teh = '' izvest = '' for root, dirs, files in os.walk('texts'): for f in files: if 'anekdots' in root: num_anek = len(files) anek += open(os.path.join(root, f), encoding = 'utf-8').read() elif 'izvest' in root: num_izvest = len(files) izvest += open(os.path.join(root, f), encoding = 'utf-8').read() elif 'teh_mol' in root: num_teh = len(files) teh += open(os.path.join(root, f), encoding = 'utf-8').read() words_anek = preprocessing(anek) words_teh = preprocessing(teh) words_izvest = preprocessing(izvest) words = words_anek + words_teh + words_izvest def freq_dict(arr): dic = {} for element in arr: if element in dic: dic[element] += 1 else: dic[element] = 1 return dic corpus_freq = freq_dict(words) anek_freq = freq_dict(words_anek) izvest_freq = freq_dict(words_izvest) teh_freq = freq_dict(words_teh) def first (dic): i = 0 for word in sorted(dic, key = lambda m: -dic[m]): if i > 100: break i += 1 return dic corpus_freq_first= first(corpus_freq) def firstcat (dic): newdic = {} if word in words: newdic [word] = dic [word] return newdic anek_freq_first = firstcat(anek_freq) izvest_freq_first = firstcat(izvest_freq) teh_freq_first = firstcat(teh_freq) def pmi_for_cats(x, y): words_ex = [] freq_ex = {} if y == 'anek': dic = anek_freq_first arr = words_anek num = num_anek words_ex = words - words_anek freq_ex = freqdict (words_ex) elif y == 'teh': dic = teh_freq_first arr = words_teh num = num_teh words_ex = words - words_teh freq_ex = freqdict (words_ex) elif y == 'izvest': dic = izvest_freq_first arr = words_izvest num = num_izvest words_ex = words - words_izvest freq_ex = freqdict (words_ex) p_xy = dic[x]/len(arr) p_x, p_y = freq_ex[x]/len(words_ex), num/(num_izvest + num_teh + num_anek) pmi = log(p_xy/(p_x * p_y)) return pmi cat_pmi = {} i = 0 for word in corpus_freq: if i > 100: break try: pmi_anek = pmi_for_cats(word, 'anek') except KeyError: pmi_anek = 0 try: pmi_teh = pmi_for_cats(word, 'teh') except KeyError: pmi_teh = 0 try: pmi_izvest = pmi_for_cats(word, 'izvest') except KeyError: pmi_izvest = 0 max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) if max_pmi == 0: continue if max_pmi == pmi_anek: cat = 'anek' elif max_pmi == pmi_teh: cat = 'teh' elif max_pmi == pmi_izvest: cat = 'izvest' print(word, cat) i += 1 import re from math import log punct = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): longwords=[] text_wo_punct = re.sub(punct, '', text.lower()) words = text_wo_punct.strip().split() for word in words: if len(word)>=4: longwords.append(word) return longwords import os anek = '' teh = '' izvest = '' for root, dirs, files in os.walk('texts'): for f in files: if 'anekdots' in root: num_anek = len(files) anek += open(os.path.join(root, f), encoding='utf-8').read() elif 'izvest' in root: num_izvest = len(files) izvest += open(os.path.join(root, f), encoding='utf-8').read() elif 'teh_mol' in root: num_teh = len(files) teh += open(os.path.join(root, f), encoding='utf-8').read() words_anek = preprocessing(anek) words_teh = preprocessing(teh) words_izvest = preprocessing(izvest) words = words_anek + words_teh + words_izvest def bigram_dict(words): bigrams = [] for ind in range(1, len(words) - 1): bigrams.append(' '.join([words[ind - 1], words[ind]])) bigram_freq = {} for b in bigrams: if b in bigram_freq: bigram_freq[b] += 1 else: bigram_freq[b] = 1 return bigram_freq corpus_bfreq = bigram_dict(words) anek_bfreq = bigram_dict(words_anek) izvest_bfreq = bigram_dict(words_izvest) teh_bfreq = bigram_dict(words_teh) def pmi_for_cats(x, y): if y == 'anek': dic = anek_bfreq num = num_anek elif y == 'teh': dic = teh_bfreq num = num_teh elif y == 'izvest': dic = izvest_bfreq num = num_izvest p_xy = dic[x]/len(dic) p_x, p_y = corpus_bfreq[x]/len(corpus_bfreq), num/(num_izvest + num_teh + num_anek) pmi = log(p_xy/(p_x * p_y)) return pmi cat_pmi = {} i = 0 for bigram in corpus_bfreq: if i > 100: break try: pmi_anek = pmi_for_cats(bigram, 'anek') except KeyError: pmi_anek = 0 try: pmi_teh = pmi_for_cats(bigram, 'teh') except KeyError: pmi_teh = 0 try: pmi_izvest = pmi_for_cats(bigram, 'izvest') except KeyError: pmi_izvest = 0 max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) if max_pmi == 0: continue if max_pmi == pmi_anek: cat = 'anek' elif max_pmi == pmi_teh: cat = 'teh' elif max_pmi == pmi_izvest: cat = 'izvest' print(bigram, cat, pmi_for_cats(bigram, cat)) i += 1 a=[] for i in range (8): a.append(input()) b=[] for i in range (0,7,2): b.append(a[i]) b.append(a[i+1]) print (''.join(b)) b=[] import re regsit=r"\bси(жу|ди((шь|м|те?)?)|е(л[и,а,о]?|ть|в(ш(и(й|е|х|ми?)?))|ая|е(й|е|му?|го)))?|я(т|щ(и(й|ми?|х|е)|е(е|й|го|му?)|ая|ую))\b" def tekst(): with open ('new.txt', 'r', encoding='utf-8') as f: text=f.readlines() found=[] for line in text: words=line.split() for word in words: wor=word.lower() wor=wor.strip(".,?!:;-") if re.search (regsit, wor)!=None and found.count(wor)==0: found.append(wor) print(wor) tekst() print ('Введите число') n=int(input()) k=1 while k*2<=n: k=k*2 print (k) continue with open('freq.txt', 'r', encoding = 'utf-8') as f: print ('Задание 1') for line in f: form = [] form = line.split(' | ') if form[1] =='союз': print (' | '.join(form)) with open('freq.txt', 'r', encoding = 'utf-8') as f: sum=0 fem = [] morp = [] print ('Задание 2') for line in f: form = [] form = line.split(' | ') morp = form[1].split(' ') if len(morp)>2: if morp[2]=='ед' and morp[3]=='жен': fem.append(form[0]) sum += float(form[2]) print (', '.join(fem)) print('Сумма ipm:', sum) word = input ('Введите слово: ') print (word) while len(word) > 1: word=word [1:len(word)-1] print (word) import re reg=r'(\n\n

(.*?)(.*?)

' m = re.search(links,content) if m != None: return m.group(3) def result(): with open ('Результат.txt', 'w', encoding = 'utf - 8')as file: result = open_s() return file.write('Преподаватели:'+ result) open_s() result() import re def open_text(): with open('Leskov.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip(',.”"?!-:;') return arr def words(): arr = open_text() return len(arr) print(words()) def frequency(): arr = open_text() d = {} for i in range(len(arr)): if arr[i] not in d: d[arr[i]] = 1 else: d[arr[i]] +=1 return d def result(): with open ('Результат.csv', 'w', encoding = 'utf - 8')as file: d = frequency() for key in sorted(d): file.write(key + ',' + str(d[key])+ '\n') return result() def phrase(): with open('Leskov.txt', 'r', encoding = 'utf - 8') as f: text = f.read() with open ('Результат.txt', 'w', encoding = 'utf - 8')as file: reg = re.findall(r'\b\w*аго \w*(?:и|а|ы)',text) for reg return reg phrase() def puzzle(): phrase = {'незванный':'гость','розовый':'слон','вишнёвый':'сад', 'сиреневый':'туман', 'кленовый':'лист'} for key in phrase: for i in range(len(key)): print(key + '...') w = input('Я загадал слово ') if w == phrase[key]: return print('Ты выиграл') else: print ('Ты проиграл') return puzzle() def open_text(): with open('Книга1.csv', 'r', encoding = 'utf - 8') as f: line= f.readline() arr = line.split(';') for i, w in enumerate(arr): arr[i] = arr[i].strip('\n') line = f.readline() arr1 = line.split(';') phrase = dict(zip(arr,arr1)) return phrase def puzzle(): phrase = open_text() for key in phrase: for i in range(len(key)): print(key + '...') w = input('Я загадал слово ') if w == phrase[key]: return print('Ты выиграл') else: print ('Ты проиграл') return puzzle() import re def open_s(): with open ('Лингвистика — Википедия.html','r', encoding = 'utf - 8')as f: text = f.read() return text def lang_meat(): m = open_s() step = re.sub(r'\bязык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bшашлык\1',m, flags = re.DOTALL) step2 = re.sub(r'\bЯзык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bШашлык\1', step, flags = re.DOTALL) return step2 def result(): with open ('Результат.txt', 'w', encoding = 'utf - 8')as file: result = lang_meat() return file.write(result) result() first = 0 second = 0 f = open( "Капибара.txt", "r", encoding = "utf-8") for line in f: arr = line.split() for i in arr: if len(i) == 3 and i[len(i)-1] != ',' and i[len(i)-1] != '.' and i[len(i)-1] != ':' and i[len(i)-1] != ';' and i[len(i)-1] != '!' and i[len(i)-1] != '?': first += 1 if len(i) == 4 and i[len(i)-1] == ',' for i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?': first += 1 if len(i) == 1 and i != '―': second += 1 if len(i) == 2 and i[len(i)-1] == ',' or i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?': second +=1 if second == 0: print( 'Слов длины один нет') else: num = first/second print(num) f.close() import re import os def folder(): arr = [f for f in os.listdir('.')if re.search(r'[а-яёЁА-Я]+',f)if os.path.isdir(f)] print(len(arr)) return arr folder() def print_result(): result = [] for f in os.listdir('.'): if os.path.isfile(f): f = f[:f.rfind('.')] if f not in result: result.append(f) else: if f not in result: result.append(f) return ' '.join([str(i) for i in result]) print(print_result()) import os import re def text(): for root, dirs, files in os.walk('.'): for f in files: if f.endswith('.xhtml'): with open( f, 'r') as text: text = text.read() reg = re.findall(r'', text) with open('Exam.txt', 'w', encoding = 'utf-8') as f2: f2.write(f +'\t'+str(len(reg))+ '\n') return text() def table(): for f in os.listdir('.'): with open( f, 'r') as text: text = text.read() reg1 = re.findall(r' ([А-Яа-яёЁ]*)\.', text) reg2 = re.findall(r'([0-9]*)', text) for i in reg1 and j in reg2: with open ('Результат.csv', 'w', encoding = 'utf - 8')as file: file.write( f + ',' + i + ',' + j+ ','+ '\n') return table() n = 0 f = open( "Цитаты.txt", "r", encoding = "utf-8") for line in f: arr = line.split ('—') arr2 = arr[0].split() if len(arr2) < 10: print (arr[0]) arr3 = line.split() for i in arr3: if i == 'разум': n += 1 print (n) f.close() import re def open_s(): with open ('Капибара — Википедия.html','r', encoding = 'utf - 8')as f: text = f.read() reg = r'(.*?)' m = re.findall(reg,text) return m print (open_s()) for link in links [:10]: print(link[0] for link in links[:10]: print(link[2],'-->', link[1] import os def delete(dirname): for root, dirs, files in os.walk(dirname): for f in files: os.remove(os.path.join(root, f)) for d in dirs: delete(os.path.join(root,d)) os.rmdir(root) delete('кот') def print_tree(dirname, space = 0): for root, dirs, files in os.walk(dirname): print(''*root) for i in files: print(''*space,' **()'.format(i) space += 2 import os def task_0(): print(os.listdir('.')) task_0() def task_1(): sent = input('Введите предложение:') arr = sent.split() path = '\\'.join([str(i) for i in arr]) os.makedirs(path) task_1() def task_2(): n = int(input()) arr = [] for i in range(n): arr.append(i) path = '\\'.join([str(i) for i in arr]) os.makedirs(path) task_2() def count_tf(word, text): return text.count(word) / len(text) def count_df(word, texts): n = [1 for text in texts if word in text] return sum(n) def count_idf(word, texts): n = len(texts) / (1 + count_df(word, texts)) return n from math import log def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) return log(tf, 10) * log(idf, 10) import re punct = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) word = text_wo_punct.strip().split() words = [i for i in word if len(i)>4 and re.search(r'[1-9]+', i) is None] return words import os texts_dic = {} for root, dirs, files in os.walk('wikipedia'): for f in files[:50]: with open(os.path.join(root, f), 'r', encoding='utf-8') as t: text = preprocessing(t.read()) texts_dic[f.split('.')[0]] = text texts = list(texts_dic.values()) for text in texts_dic: print("Top words in document {}".format(text)) scores = {} for word in texts_dic[text]: scores[word] = count_tfidf(word, texts_dic[text], texts) sorted_words = sorted(scores.items(), key=lambda x: x[1]) for word, score in sorted_words[:5]: print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5))) def open_text(): with open('text.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip(',.?!-') return arr def first_letter(letter): arr = open_text() for i in arr: if letter == i[0]: print (i) else: pass return i letter = input('Введите букву') def c(): cont = input('Введите страну ') d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'} if cont in d: return d[cont] else: return 'NO' def change(): d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'} d1 = {} for key in d: city = d[key] d1[city] = key return d1 def delete_doubles(): d = { 'Петя': 12345, 'Пётр': 12345, 'Аня': 54321, 'Анна': 54321, 'Сёма': 13579} arr = [] d1 = {} for key in d: if d[key] in arr: pass else: append d1[key] = d[key] return d1 print(delete_doubles()) import re def open_text(): with open('Жирафики.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip(',.?!-') return arr def giraf(): s = input('Введите что-нибудь ') regex = 'жираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?' m = re.search(regex,s) if m != None: return 'Я нашёл' print(giraf()) def giraf_in_text(): arr = open_text() regex = r'\bжираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?\b' m = re.search(regex,i[arr]) s = 0 for i[arr] in arr: if m != None: s += 1 return s print(giraf_in_text()) import re def open_s(): with open ('Динозавры — Википедия.html','r', encoding = 'utf - 8')as f: text = f.read() return text def find_dino(): text = open_s() reg = r'\b[Дд]инозавр[а-я]{0,5}' m = re.findall(reg, text) return m print (find_dino()) def no_html(): text = open_s() m = re.sub(u'<.*?>', u'', text, flags = re.DOTALL) return m print (no_html()) def cat_dino(): m = no_html() n = re.sub(r'\bдинозавр',r'\bкот',m,flags = re.DOTALL) n1 = re.sub(r'\bДинозавр',r'\bКот',n, flags = re.DOTALL) return n1 print(cat_dino()) import re def open_text(): with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = re.sub(r'\n',' ', text) arr = re.split('\.|\?|\! ', text) for i, s in enumerate(arr): arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i]) return arr def words_5(): arr = open_text() for i in range(len(arr)): arr1 = arr[i].split() template = '{}_{}' length =[template.format(arr1[i],len(arr1[i])) for i,w in enumerate(arr1)] for i in range(len(length)): print(length[i]) return words_5() import re def open_text(): with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = re.sub(r'\n',' ', text) arr = re.split('\.|\?|\! ', text) for i, s in enumerate(arr): arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i]) return arr def words_5(): arr = open_text() for i in range(len(arr)): arr1 = arr[i].split() template = '{}_{}' length ={print(template.format(arr1[i],len(arr1[i]))) for i,w in enumerate(arr1)} return words_5() def open_text(): with open('Austen_Jane.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip(',.”"?!-:;') return arr def words(): arr = open_text() arr1 = [] for i,w in enumerate(arr): if arr[i][-4:] == 'hood': arr1.append(arr[i]) return arr1 def number_of_words(): arr1 = words() return len(arr1) def the_minimum_frequency(): arr = open_text() arr1 = words() y = 1 n = 0 for i,w in enumerate(arr1): y = min(y, arr1.count(arr1[i])) for i,w in enumerate(arr1): if y == arr1.count(arr1[i]): n = i return arr1[n] def base(): arr1 = words() arr2 = [] for i,w in enumerate(arr1): x = arr1[i].rfind('h') arr2.append(arr1[i][:x]) return arr2 print(number_of_words()) print(the_minimum_frequency()) print(' '.join(map(str,(base())))) arr = [] word = input('Введите слово') while word: arr.append (word) word = input( 'Введите слово') for w in range(len(arr)): print(arr[w][w+1::]) word=input("Введите слово: ") n=[] for i in range(len(word)): n=word[len(word)-i:] n+=word[i:] print(n) print ("Введите число") num=int(input()) print ("Введите слово") w=input() while w!= "программирование" and w!= "программирование": for i in range(num): print (w) print ("Введите число") num=int(input()) print ("Введите слово") w=input() print ("Конец") w = [] while True: word =(input('Введите латинское слово: ')) if len (word) ==0: break elif word[-2:]== 're' or word [-2:]=='ri': w.append(word) for i in range (len(w)): print (w[i]) def open_text(name): with open (name+'.txt', 'r', encoding ='utf-8') as f: text=f.read() ntext=text.lower() words=ntext.split(' ') for i,word in enumerate (words): words[i]=word.strip('.,!?-') return words def edwords(a): ed=[] edlist=int() for i,word in enumerate (a): if word.endswith('ed'): ed.append(word) edlist+=1 print ('Количество форм на -ed равно',str(edlist)) return (ed) def iedwords(b): iedlist=int() for i,word in enumerate (b): if word.endswith('ied'): iedlist+=1 print ('Количество форм, образованных от глаголов на -у или -е равно',str(iedlist)) return () def end(): name=input('Введите название файла: ') a=open_text(name) b=edwords(a) c=iedwords(b) return (c) u=end() with open("text.txt", "r", encoding="utf-8") as f: text=f.read() words=text.split(' ') words_num=len(words) letters=list(text) marks_num=int() for i in letters: if i=="." or i==",": marks_num+=1 percent=marks_num/words_num*100 print('Процент слов, имеющих знак препинания: ', round(percent)) import random def bigram(): b={} with open ('text.csv', 'r') as f: lines=f.readlines() for line in lines: line=line.split(',') b[line[0]]=line[1] return(b) def dots(w): res='' for i in range(len(w)): res+='. ' return res def rand(b): k=list(b.keys()) return random.choice(k) print ('Сейчас мы сыграем в игру "Угадай слово"!') big=bigram() word=rand(big) print ("Подсказка:") print (big[word]+' '+ dots(big[word])) answer=input('Как вы думаете, что это за слово? ') if answer==word: print ("Правильно!") else: print ('Увы, неправильно!') import os import re def sents(): news = 'news' sent = {} for n in os.listdir(news): with open(os.path.join(news, n), encoding='cp1251') as text: text = text.read() sent[n] = len(re.findall('', text)) return (sent) def new (sent): with open('new_file', 'w', encoding = 'utf-8') as new: for s in sent: new.write(s+'\t'+str(sent[s])+'\n') new(sents()) import re def oh(): lines=int() with open ('text.txt', 'r', encoding='utf-8') as first: old_text=first.readlines() for line in old_text: if '' in line: break else: lines+=1 with open ('endtext.txt', 'w', encoding='utf-8') as second: lines1=str(lines) second.write(lines1) def oops(): slov={} with open ('text.txt', 'r', encoding='utf-8') as first: old_text=first.readlines() for line in old_text: if " 2) and (usl[2] == 'ед') and (usl[3] == 'жен'): mass.append(arr[0]) sum += float(arr[2]) print(', '.join(mass)) print('Сумма ipm = ', sum) n=input('Введите любое число. ') n=int(n) while n != 0 : sl=input('Введите любое слово. ') if sl == 'программирование': break print(sl) n=n-1 word = input("Введите слово: ") for k in range(len(word)): newword = (word[-k: ] + word[ :-k]) print(newword) import re import os def sent_count(): path = './news/' for root, dirs, files in os.walk(path): for f in files: with open(os.path.join(root, f), 'r', encoding = 'cp1251') as t: text = t.read() mass = [] mass = text.split('\n') s_count = 0 for i in mass: if re.search('', i): s_count += 1 with open('result.txt', 'a', encoding = 'utf-8') as file: file.write(f +'\t' + str(s_count) + '\n') def write_csv(): with open('result.csv', 'w', encoding = 'utf-8') as file: output = csv.writer(file, delimiter = ',') head = ['Название файла', 'Автор', 'Тематика текста'] path = './news/' for root, dirs, files in os.walk(path): for f in files: with open(os.path.join(root, f), 'r', encoding = 'utf-8') as t: text = t.read() if re.search('', text): auth = re.search('', text).group(1) def main(): sent_count() main() s = 8 p = input ("введите число") p = int (p) while p!=s: if p < s: print ("больше") else: print ("меньше") p = input ("ещё раз") if len (p) == 0: print ("всё") break p = int (p) if p==s: print ("вы выиграли") print (range (10)) import os direct = 'D:\Downloads\news.zip\news' files = os.listdir(direct) for file in files: f = open(file, 'r') sent = 0 for line in f: if '. ' in line: sent += 1 f.close() f = open('sent.txt', 'a') f.write(file, ' ', sent, '\n') f.close() A = [0] * 7 for i in range(7): A[i] = int(input("введите число")) for i in range(7): B = ["X"] * A[i] if A[i] < 0: print ("введено отрицательное число") else: print (''.join([str(i) for i in B])) s = input ("введите число") s = int (s) for i in range (10): i += 1 p = i*s print (i, "*", s, "=", p) words = 0 cap_words = 0 for line in open('text.txt','r', encoding='utf-8'): word = line.split(' ') for word in open('text.txt','r', encoding='utf-8'): words +=1 if word.istitle(): cap_words += 1 s = (cap_words/words)*100 print ("слов, начинающихся с заглавной буквы", s, "%") a = input ("введите a") b = input ("введите b") c = input ("введите c") a = int (a) b = int (b) c = int (c) if c == a % b: print ("a даёт остаток c при делении на b") else: print ("a НЕ даёт остаток c при делении на b") if c == a/b: print ("a разделить на b равно c") else: print ("a разделить на b НЕ равно c") def linecount (text): lines = 0 for line in text: lines += 1 return lines def freq (text): A = dict() for line in text: if " 3): dic[element] += 1 elif len(element)> 3: dic[element] = 1 return dic def delete (dic): dic1=dic.copy() for word in dic1: if dic1[word] == 1: del dic[word] return dic corpus_freq = freq_dict(words) anek_freq = freq_dict(words_anek) izvest_freq = freq_dict(words_izvest) teh_freq = freq_dict(words_teh) delete (corpus_freq) delete(anek_freq) delete(izvest_freq) delete(teh_freq) def pmi_for_cats(x, y): if y == 'anek': dic = anek_freq arr = words_teh + words_izvest num = num_anek elif y == 'teh': dic = teh_freq arr = words_anek + words_izvest num = num_teh elif y == 'izvest': dic = izvest_freq arr = words_teh + words_anek num = num_izvest p_xy = dic[x]/len(arr) p_x, p_y = corpus_freq[x]/len(words), num/(num_izvest + num_teh + num_anek) pmi = log(p_xy/(p_x * p_y)) return pmi cat_pmi = {} i = 0 for word in corpus_freq: if i > 100: break try: pmi_anek = pmi_for_cats(word, 'anek') except KeyError: pmi_anek = 0 try: pmi_teh = pmi_for_cats(word, 'teh') except KeyError: pmi_teh = 0 try: pmi_izvest = pmi_for_cats(word, 'izvest') except KeyError: pmi_izvest = 0 max_pmi = max(pmi_anek, pmi_teh, pmi_izvest) if max_pmi == 0: continue if max_pmi == pmi_anek: cat = 'anek' elif max_pmi == pmi_teh: cat = 'teh' elif max_pmi == pmi_izvest: cat = 'izvest' print(word, cat) i += 1 word = input ("Введите слово: ") a = 0 b = len (word) while word [a:b] != "": print (word [a:b]) a += 1 b -=1 A = [] i = 0 print ('Enter 8 words') while i < 9: list.append(A, input()) i+=1 i = 1 while i < 9: print (A[i-1]+A[i]) i+=2 a = int (input ("Введите число a: ")) b = int (input ("Введите число b: ")) c = int (input ("Введите число c: ")) if (a/b == c): print ("a / b = c") else: print ("a / b != c") if (a ** b == c): print ("a ^ b = c") else: print ("a ^ b != c") def questions(): file = input('Введите название файла на английском: ') leng = int(input('Введите длину слова: ')) quant = open_file(file) output = perc(quant, leng) return output def open_file(file): f = open(file, 'r') file = f.read() file = file.split() return file def perc(quant, leng): i = 0 j = 0 for item in quant: if item.startswith('un'): i += 1 if len(item) > leng: j += 1 if i != 0: print ('Количество слов, начинающихся с un-, в тексте: ', i) return round(j / i * 100) else: return 'В тексте нет слов, начинающихся на un-' print('Проценты: ', questions()) import random def read (): f = open('text.txt', 'r') l = f.readlines() return l def array (numb): a = read()[numb].split() return a def noun2 (): return random.choice(array(0)) def noun3 (): return random.choice(array(1)) def noun4 (): return random.choice(array(2)) def imper2 (): return random.choice (array(3)) def imper3 (): return random.choice(array(4)) def imper4 (): return random.choice(array(5)) def verb2 (): return random.choice(array(6)) def verb3 (): return random.choice(array(7)) def verb4 (): return random.choice(array(8)) def adverb1 (): return random.choice (array(9)) def adverb2 (): return random.choice (array(10)) def adverb3 (): return random.choice (array(11)) def adverb4 (): return random.choice (array(12)) def punct(): marks = [".", "?", "!", "..."] return random.choice(marks) def verse_5_1 (): return imper3() + ' ' + noun2() + punct() def verse_5_2 (): return imper2() + ' ' + noun3() + punct() def verse_5_3 (): return verb2() + ' ' + noun3() + punct() def verse_5_4 (): return verb3() + ' ' + noun2() + punct() def verse_5_5 (): return adverb1() + ' ' + verb2() + ' ' + noun2() + punct() def verse_5_6 (): return adverb1() + ' ' + imper4() + punct() def verse_5_7 (): return adverb2() + ' ' + imper3() + punct() def verse_5_8 (): return adverb3() + ' ' + imper2() + punct() def verse_7_1 (): return imper3() + ' ' + noun4() + punct() def verse_7_2 (): return imper4() + ' ' + noun3() + punct() def verse_7_3 (): return verb3() + ' ' + noun4() + punct() def verse_7_4 (): return verb4() + ' ' + noun3() + punct() def verse_7_5 (): return adverb1() + ' ' + verb3() + ' ' + noun3() + punct() def verse_7_6 (): return adverb1() + ' ' + verb4() + ' ' + noun2() + punct() def verse_7_7 (): return adverb1() + ' ' + verb2() + ' ' + noun4() + punct() def verse_7_8 (): return adverb2() + ' ' + verb2() + ' ' + noun3() + punct() def verse_7_9 (): return adverb2() + ' ' + verb3() + ' ' + noun2() + punct() def make_verse_5 (): verse = random.choice([1,2,3, 4, 5, 6, 7, 8]) if verse == 1: return verse_5_1() elif verse == 2: return verse_5_2() elif verse == 3: return verse_5_3() elif verse == 4: return verse_5_4() elif verse == 5: return verse_5_5() elif verse == 6: return verse_5_6() elif verse == 7: return verse_5_7() else: return verse_5_8() def make_verse_7 (): verse = random.choice([1,2,3, 4, 5, 6, 7, 8, 9]) if verse == 1: return verse_7_1() elif verse == 2: return verse_7_2() elif verse == 3: return verse_7_3() elif verse == 4: return verse_7_4() elif verse == 5: return verse_7_5() elif verse == 6: return verse_7_6() elif verse == 7: return verse_7_7() elif verse == 8: return verse_7_8() else: return verse_7_9() print(make_verse_5()) print(make_verse_7()) print(make_verse_5()) print(make_verse_7()) print(make_verse_7()) num = int (input ("Введите натуральное чиcло: ")) i = 1 while 2**i < num: print (2**i) i+=1 import re def read_file(): with open ('corp.txt', 'r', encoding='UTF-8') as file: text=file.read() file.close() return text def counter(): file = open('corp.txt', 'r', encoding='UTF-8') i=1 for line in file: if '' not in line: i+=1 else: break file.close() return i def five_points(): new_file=open('подсчет строк.txt', 'w', encoding='utf-8') text=new_file.write('Число строк заголовка: ' + str(counter())) new_file.close() def dictionary(): d={} wordlist=[] lemmas=re.findall('>\w+', read_file()) for lemma in lemmas: lemma=lemma.strip('>') wordlist.append(lemma) for word in wordlist: if word in d: d[word]+=1 else: d[word]=1 return d def eight_points(): d=dictionary() dic_file=open('словарик.txt', 'w', encoding='utf-8') for key in d: text=dic_file.write(key+' - '+ str(d[key])+' \n') dic_file.close() def ten_points(): formlist=[] file = open('corp.txt', 'r', encoding='UTF-8') for line in file: pronom=re.search('type="(f.h.+?)"', line) if pronom != None: find=pronom.group(1) formlist.append(find) return formlist five_points() eight_points() print ('Загляните в папку с программой и попробуйте найти в ней новые txt-файл.') print (ten_points())import os def lists_creator(): aaa=[] punct=[] file_list=[files for root, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки')] for folder in file_list: for file in folder: q_a=0 q_punct=0 for word in file: for letter in word: if letter=='a' or letter =='A' or letter =='А' or letter =='а': q_a+=1 if letter =='.' or letter ==',' or letter =='?' or letter =='!' or letter =='(' or letter == ')' or letter =='-': q_punct+=1 if q_a>3: aaa.append(file) if q_punct-1>0: punct.append(file) print ('+++++++++++++Файлы, в которых большк 3х "а":+++++++++++++') for el in aaa: print (el) print ('+++++++++++++Файлы со знаками препинания в названии:+++++++++++++') for el in punct: print(el) def kracuvo(): for roots, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки'): for dir in dirs: print('--',dir) path='/home/lera/Рабочий стол/Загрузки'+'/'+str(dir) for file in os.listdir(path): print (' ', file) print(os.path.join('дз ап', 'morozova3.docx'))import os import re def s_counter_5(): s_result = open('res.txt', 'w', encoding='utf-8') for root, dirs, files in os.walk('news'): s_result = open('res.txt', 'w', encoding='utf-8') for file in files: with open(os.path.join('news',file), 'r') as f: file_text = f.read() q=0 for line in file_text: if line=='.' or line=='?' or line=='!': q+=1 text=s_result.write(file +'\t'+str(q)+ '\n') s_result.close() return s_result def table_8(): table = open('table.csv', 'w', encoding='utf-8') for root, dirs, files in os.walk('news'): for file in files: with open(os.path.join('news',file), 'r') as f: file_text = f.read() info=re.findall('.+', file_text) for el in info: a=re.search('>([a-яА-Я]+.[a-яА-Я]+)?\.', el) if a!=None: author=a.group(1) else: author='no author' text=table.write(author+'\n') s_counter_5 table_8() import re def file_name(): print ('Поместите файл в одну папку с данной программой.\nВведите имя файла, чтобы получить список словоформ:') name=input() return name def read_file(): wordlist=[] file = open(file_name(), 'r', encoding='UTF-8') for line in file: linelist=line.split() for word in linelist: word=word.lower() word=word.strip('.,:;"«»-?()!') wordlist.append(word) file.close() return wordlist def form_finder(): form_list=[] for word in read_file(): form=re.search('(не(до)?|под)?вып[еиь]([йтлеюи]|(вш))[мшьаоиыуе]?(го|м(у|и)?[ейяюх])?(ся)?', word) if form!=None: find=form.group() form_list.append(word) return form_list def list_without_repetitions(): list=form_finder() for el in list: el_new=el for el in list: if el_new==el: list.remove(el) return list for el in list_without_repetitions(): print(el)print ('Введите число') n=int(input()) for i in range(n): print('Введите слово') a=input() print ('Ваше слово:', a) if a=='программирование': break print ('Цикл завершен')f=open('wordlist.txt', 'r', encoding='utf-8') for line in f: arr = line.split() for i,word in enumerate(arr): arr[i] = word.strip('.,?!;:-"') for el in arr: el=el.lower() print (el) import random def open_file(): file = open('wordlist.txt', 'r', encoding='UTF-8') lines = file.readlines() file.close() return lines def random_word(lines): ugly_word = random.choice(lines) word = ugly_word.strip('\n') return word def syllable_counter(word): syl_quan=0 for letter in word: if letter=='e' or letter=='y' or letter=='u' or letter=='i' or letter=='o' or letter=='a' or letter=='é' or letter=='è' or letter=='ê' or letter=='à' or letter=='â' or letter=='ù' or letter=='û' or letter=='ô' or letter=='î': syl_quan+=1 return syl_quan def line_creator(syl_number): syl_max = syl_number line = '' while syl_max >= 0: word = random_word(open_file()) syl_quan = syllable_counter(word) syl_max -= syl_quan if syl_max > 0: line=line+' '+ word continue elif syl_max == 0: line = line + ' ' + word break elif syl_max < 0: line = '' syl_max = syl_number continue punctuation=['!','.','?'] phrase=line[1].upper()+line[2:]+random.choice(punctuation) print (phrase) def main(): print('\nThere you can see one more perfect creation:\n') line_creator(5) line_creator(7) line_creator(5) if __name__ == '__main__': main() import os import re for item in files: file_name=item.split('.') if len(file_name[0])==5: lat=re.search('[A-Za-z]{5}', file_name[0]) if lat!=None: i+=1 if file_name[0] not in name_base: name_base.append(file_name[0]) print ('Число файлов с названием из пяти латинских символов: ',i) print ('\nСписок названий найденных файлов (без повторов):') for el in name_base: print (el) import os dirlist = [el for root, dirs, files in os.walk('.') for el in dirs] stat = {} letters = 'qwertyuiopasdfghjklzxcvbnmйцукенгшщзхъфывапролджэячсмитьбю' letter = [name[0].lower() for name in dirlist] for el in letter: if el not in letters: letter.remove(el) if el in stat: stat[el] += 1 else: stat[el] = 1 i = 0 res = 0 for value in stat: if stat[value] > i: i = stat[value] res = value if i==0: print ('Названий, начинающихся с букв, похоже, тут нет :(') else: print('Чаще всего названия папок начинаются с буквы:', res, '\nТакие названия встречаются', i, 'раз(a)') word=input('Введите слово: ') if word: for i in range(len(word)): print (word[i:]+word[:i]) if i>len(word)-1: break else: print ('Нет входных данных')def read_words(): wordlist=[] file = open('austen.txt', 'r', encoding='UTF-8') for line in file: linelist=line.split() for word in linelist: wordlist.append(word) file.close() return wordlist def counter(part): quan=0 for word in read_words(): if word[-len(part):]==part: quan+=1 return quan print ('Число форм в данном тексте, оканчивающихся на -ed: ',counter('ed')) print ('Из них - правильные глаголы в прошедшем времени на -y:',counter('ied'))import re def file_name(): print ('Поместите файл в одну папку с данной программой.\nВведите имя файла, чтобы получить список cфер деятельности данного ученого:') name=input() return name def reader(): list=[] file = open(file_name(), 'r', encoding='UTF-8') for line in file: line=line.strip('\n') list.append(line) file.close() return list def str_sphere(): infobox=reader() sphere='' q=0 for line in infobox: if 'Научная сфера:' in line: sphere=infobox[q+2] break else: q+=1 return sphere def main(): form=re.findall('>[а-я -]+', str_sphere()) list='' for el in form: el=el.strip('>.+',i) form2=re.findall('ana',i) for el in form2: num_ana+=1 if form1!=None: num_w+=1 koef=num_ana/num_w return koef def freq_dict_8(): d={} list=[] new_list=[] for i in reader(): form=re.search('gr="(.+)"',i) if form!=None: list.append(form.group(1)) for el in list: i = el.split(',') new_list.append(i) keys=[item[0].strip('=qwertyuiopasdfghjklzxcvbnm/<>" ') for item in new_list if item!='NUM=nom" /> 0 : print('X'*nlist[i]) else: print('') i += 1 import re def openfile_lines(fname): with open(fname, 'r', encoding = 'utf-8') as f: lines = f.readlines() return lines def find_words(lines): words = [] for i in range(len(lines)): if re.search('(.+?)<', words[i]): found_lemma = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(1) found_type = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(2) found_form = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(3) pure.append([found_lemma, found_type, found_form]) return pure def count_forms(words): freq = {} for i in range(len(words)): form = re.search('type="(.+?)"', words[i]).group(1) if form not in freq: freq[form] = 1 else: freq[form] += 1 return freq def plural_adjectives(freqs): forms = list(freqs.keys()) pluradj = [] for i in range(len(forms)): if re.search('l.f.*', forms[i]): adj_form = re.search('l.f.*', forms[i]).group() if adj_form: pluradj.append(adj_form) pluradj_freq = {} for i in range(len(pluradj)): pluradj_freq[pluradj[i]] = freqs[pluradj[i]] return pluradj_freq def main(): lines_dict = openfile_lines('dict.txt') word_list = find_words(lines_dict) pure_info = purify_info_about_words(word_list) freq_dict = count_forms(word_list) pluradj_freq_dict = plural_adjectives(freq_dict) with open('lines.txt', 'w', encoding = 'utf-8') as f: f.write(str(len(lines_dict))) with open('word forms.txt', 'w', encoding = 'utf-8') as f: f.write('\n'.join(freq_dict.keys())) with open('plural adjectives frequencies.txt', 'w', encoding = 'utf-8') as f: text = '' for key in pluradj_freq_dict: text += str(key)+' '+str(pluradj_freq_dict[key])+'\n' f.write(text) with open('dictionary.csv', 'w', encoding='utf-8') as f: header = ['лемма', 'грамматическая форма', 'словоформа'] f.write(','.join(header)+'\n') for i in range(len(pure_info)): f.write(','.join(pure_info[i])+'\n') if __name__ == '__main__': main() import csv def main(): clues = {} with open('clues.csv', 'r', encoding='utf-8') as f: text = csv.reader(f, delimiter=',') for row in text: clues[row[0]] = row[1] n = 0 keys = list(clues.keys()) while n < len(clues): i = 0 while i <= len(keys[n]): if i < len(keys[n]): response = input(keys[n]+'...') if response == clues[keys[n]]: print('Правильно!') n += 1 break else: print('Неправильно. У тебя ещё '+str(len(keys[n]) - i+1)+' попыток.') i += 1 elif i == len(keys[n]): response = input(keys[n]+'...') if response == clues[keys[n]]: print('Правильно!') n += 1 break else: print('У тебя закончились попытки. Правильный ответ: '+keys[n]+' '+clues[keys[n]]) n += 1 if __name__ == '__main__': main() n = int(input('Введите целое положительное число.')) index = 0 while index < n: index +=1 word = input('Введите слово.') if word == 'программирование': break print(word) word = input('Введите слово в русской раскладке.') index = 0 while index < len(word): index += 1 if word[len(word) - index] != 'з' and word[len(word) - index] != 'я': print(word[len(word) - index]) import os import re import csv def open_file_texts(directory): raw_texts_dict = {} for root, dirs, files in os.walk(directory): for f in files: with open(os.path.join(root, f), 'r', encoding='windows-1251') as t: text = t.read() raw_texts_dict[f] = text return raw_texts_dict def get_sentences(text): sentences = re.findall('(.|\n)+?', text) return sentences def write_out_count_sentences(file_texts_dict): with open('amount of sentences.txt', 'w', encoding='utf-8') as f: for filename in file_texts_dict: text = file_texts_dict[filename] sent_am = len(get_sentences(text)) f.writelines(filename+'\t'+str(sent_am)+'\n') def get_words(raw_text): word_list = [] raw_lines = raw_text.split() word_lines = re.findall('(.+?)((?:\n?[«»,.! \?\-])*)', raw_text) for i in range(len(word_lines)): line = word_lines[i][0].strip('').strip('') ana = ana.strip('>').strip().strip('ana').strip() word_list.append([word] + [word_lines[i][1].strip().strip(' ')] + [ana]) return word_list def create_clear_text_out_of_words(word_list): text = [] for el in range(len(word_list)): word = word_list[el] d = re.match('\d+', word[2]) if '«' in word[2]: text.append(word[0] + ' «') elif d: text.append(word[0] + ' ' + d.group(0) +' ') else: text.append(word[0] + word[2] + ' ') return text def find_file_meta (file_texts_dict): file_meta_list = [] for filename in file_texts_dict: text = file_texts_dict[filename] author = re.search('', text) if author: author = re.search('', text).group(1) topic = re.search('', text) if topic: topic = re.search('', text).group(1) file_meta_list.append([filename, author, topic]) return file_meta_list def write_out_file_meta (file_meta_list): with open('file metadata.csv', 'w', encoding='utf-8') as n: text = csv.writer(n, delimiter=';') header = ['Название файла', 'Автор', 'Тематика текста'] text.writerow(header) for row in file_meta_list: text.writerow(row) def find_spec_bigr_in_sentence(word_list): spec_bigr = [] for i in range(len(word_list)): word = word_list[i] if i > 0: previous_word = word_list[i-1] if 'loc' in word[2] and 'PR' in previous_word[2]: spec_bigr.append(previous_word[0]+' '+word[0]) return spec_bigr def find_all_spec_bigr(raw_texts_dict): sbec_bigr = [] texts = raw_texts_dict.values() for text in texts: sentences = get_sentences(text) for sentence in sentences: sentence_word_list = get_words(sentence) sentence_spec_bigr = find_spec_bigr_in_sentence(sentence_word_list) context = create_clear_text_out_of_words(sentence_word_list) for bigr in sentence_spec_bigr: sbec_bigr.append([bigr, context]) return sbec_bigr def write_out_spec_bigr(spec_bigr): with open('bigrams.txt', 'w', encoding='utf-8') as f: for bigr in spec_bigr: f.writelines(bigr[0]+'\t'+bigr[1]+'\n') def main(): raw_texts_dict = open_file_texts('news') write_out_count_sentences(raw_texts_dict) file_meta = find_file_meta(raw_texts_dict) write_out_file_meta(file_meta) spec_bigr = find_all_spec_bigr(raw_texts_dict) write_out_spec_bigr(spec_bigr) if __name__ == '__main__': main() words = [] with open('words.txt','r', encoding = 'utf-8') as f: text = f.read() words = text.split('\n') for i in range(len(words)): if ' союз ' in words[i]: print(words[i]) words = [] with open('words.txt','r', encoding = 'utf-8') as f: text = f.read() words = text.split('\n') feminin = [] ipm = 0 word = '' gram = '' ipmi = '' for i in range(len(words)): if 'сущ' in words[i] and 'жен' in words[i]: feminin.append(words[i]) word, gram, ipmi = words[i].split('|') ipm += float(ipmi) for i in range(len(feminin)): print(feminin[i]+',') print(ipm) words = [] with open('words.txt','r', encoding = 'utf-8') as f: text = f.read() words = text.split('\n') words1 = [] word = input('Print any russian word. ') while word: words1.append(word) word = input('Print any russian word. ') for i in range(len(words1)): check = 0 for x in range(len(words)): if words[x].count('|') == 2: word, gram, ipmi = words[x].split('|') if words1[i] == word.strip(' '): print('grammar:', gram.strip(' ')+',' , 'ipm =', float(ipmi)) check = 1 if check == 0: print('This word was not find in the dictionary.') text = input('Type something: ') for i in range(len(text)): print(text[i:]+text[:i]) import re def match_verb_forms(line): infinitive = re.match(r'программировать(ся)?', line, re.I) future = re.match(r'буд(е(шь|те?|м)|ут?) программировать', line, re.I) present = re.match(r'программиру(ю|(е(те?|м|шь)))', line, re.I) past = re.match(r'программировал(а|и)?', line, re.I) past_participle = re.match(r'программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) present_participle = re.match(r'программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) transgressive_active = re.match(r'программируя', line, re.I) transgressive_passive_past = re.match(r'будучи программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) transgressive_passive_present = re.match(r'будучи программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I) if infinitive and not future: match = infinitive elif future: match = future elif present: match = present elif past: match = past elif past_participle: match = past_participle elif present_participle: match = present_participle elif transgressive_active: match = transgressive_active elif transgressive_passive_past and not past_participle: match = transgressive_passive_past elif transgressive_passive_present and not present_participle: match = transgressive_passive_present else: match = None return match def open_forms(fname): forms = [] with open (fname, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() forms = text.split() for i in range(len(forms)): forms[i] = forms[i].strip('.,?*()«»') return forms def main(): matches = [] forms = open_forms('test.txt') for i in range(len(forms)-1): if i < len(forms): if match_verb_forms(forms[i] +' '+ forms[i+1]): if match_verb_forms(forms[i] +' '+ forms[i+1]).group() not in matches: matches.append(match_verb_forms(forms[i] +' '+ forms[i+1]).group()) else: if match_verb_forms(forms[i]): if match_verb_forms(forms[i]).group()not in matches: matches.append(match_verb_forms(forms[i]).group()) print(*matches) if __name__ == '__main__': main() import re import os import shutil flist = os.listdir(os.getcwd()) clist = [] cfcount = 0 for n in flist: cyrillic = 1 name = n.split('.')[0] for let in name: if not re.match('[А-Яа-яЁё]',let): cyrillic = 0 if cyrillic == 1: if os.path.isdir(n): cfcount += 1 if name not in clist: clist.append(name) print(cfcount) print(clist) import re import csv def open_file(name): with open(name, 'r', encoding='utf-8') as f: file_text = f.read() return file_text def get_words(raw_text): word_arr = [] raw_lines = raw_text.split() word_lines = re.findall('(.+)((?:\n?[«»,.! \?\-])*(?:\n?[01234567])*)', raw_text) for i in range(len(word_lines)): line = word_lines[i][0].strip('').strip(' 0: line[e] = line[e].strip(' />') word_arr.append([line[0]] + [len(line)-1] + [word_lines[i][1].strip().strip(' ')] + line[1:]) return word_arr def count_average_anas(word_arr): total = 0 average = 0 for i in range(len(word_arr)): total += word_arr[i][1] average = total/len(word_arr) return average def count_all_pos(word_arr): pos_dict = {} for i in range(len(word_arr)): for el in range(len(word_arr[i])): if el > 2: pos = re.search('gr="(\w+)', word_arr[i][el]).group(1) if pos not in pos_dict: pos_dict[pos] = 1 else: pos_dict[pos] += 1 with open('parts of speech frequency.txt', 'w', encoding='utf-8') as f: for pos in pos_dict: f.writelines(pos+'\t'+str(pos_dict[pos])+'\n') return pos_dict def make_text(word_arr): text = [] for el in range(len(word_arr)): word = word_arr[el] d = re.match('\d+', word[2]) if '«' in word[2]: text.append(word[0] + ' «') elif d: text.append(word[0] + ' ' + d.group(0) +' ') else: text.append(word[0] + word[2] + ' ') return text def find_all_instr(word_arr, text): instr_words_dict = {} for n in range(len(word_arr)): word = word_arr[n] for i in range(len(word)): if i > 1: instr = re.search('ins', word[i]) if instr: if word[0] not in instr_words_dict: instr_words_dict[word[0]] = [n] print(instr_words_dict[word[0]]) elif n not in instr_words_dict[word[0]]: instr_words_dict[word[0]] += [n] print(instr_words_dict[word[0]]) with open('words in instrumentalis.txt', 'w', encoding='utf-8') as f: for word in instr_words_dict: for i in range(len(instr_words_dict[word])): x = instr_words_dict[word][i] y = min(x+4, len(text)-1) f.writelines(''.join(text[x-3:x])+'\t'+word+'\t'+''.join(text[x+1:y])+'\n') return instr_words_dict def main(): raw_text = open_file('text.xml') word_arr = get_words(raw_text) average_anas = count_average_anas(word_arr) print(average_anas) count_all_pos(word_arr) find_all_instr(word_arr, make_text(word_arr)) if __name__ == '__main__': main() import re def open_text_phrases(fname): phrases = [] with open (fname, 'r', encoding = 'utf-8') as f: text = f.read() text = re.sub('\.\.\.|[\.\?]', '!', text) phrases = text.split('!')[:-1] for i in range(len(phrases)): phrases[i] = re.sub('[<>\*\.«»,\'\"]','', phrases[i]) phrases[i] = phrases[i].strip() return phrases def main(): phrase_list = open_text_phrases('text.txt') word_length_list = [[w, len(w)] for phrase in phrase_list for w in phrase.split()] template = '{}_{}' for word in word_length_list: print(template.format(word[0], word[1])) if __name__ == '__main__': main() import re import csv def openforms(text): forms = [] text = text.lower() forms = text.split() for i in range(len(forms)): forms[i] = forms[i].strip('.,?*()«»!\'\":; ') return forms def freqlist(forms): freqs = {} for i in range(len(forms)): if forms[i] not in freqs: freqs[forms[i]] = 1 else: freqs[forms[i]] +=1 return freqs def freqlist_to_csv(freqs): with open('freq.csv', 'w', encoding='utf-8') as f: output = csv.writer(f, delimiter=',') header = ['слово', 'частота'] output.writerow(header) for key in sorted(freqs): output.writerow([key, freqs[key]]) def agosforms(text): agos = re.findall('(?:(?:[А-Яа-яіѢѣЁё])+[\s,.!\?:;"\(\)\'»\n\t—]+?){3}[А-Яа-яiѢѣ]+?аго [А-Яа-яiѢѣ]+?(?:а|и)[\s,.!\?:;"\(\)\'»\n\t—]{,5}(?:[А-Яа-яiѢѣ]+?[\s,.!\?;:—"\(\)\'»\n\t]+?){3}',text) with open('agos.txt', 'w', encoding='utf-8') as f: output = f.write('\n'.join(agos)) def main(): with open ('Лесков.txt', 'r', encoding = 'utf-8') as f: text = f.read() forms = openforms(text) print(len(forms)) freqs = freqlist(forms) freqlist_to_csv(freqs) agosforms(text) if __name__ == '__main__': main() import re def main(): with open('cats.txt', 'r', encoding = 'utf-8') as f: text = f.read() mark_dogs = re.sub('([Сс]обак(?:а(?:х|ми?)?|и|е|у|о(?:й|ю))?)([\s,.!\?:"\(\)\'»\n\]\[-])', '<<<тут было слово \\1>>> \\2', text) catstodogs = re.sub('([\s,.!\?:"\(\)\'«\n-])коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1собак\\2\\3', mark_dogs) CatstoDogs = re.sub('([\s,.!\?:"\(\)\'«\n-])Коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1Собак\\2\\3', catstodogs) dogstocats = re.sub('<<<тут было слово собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'кошк\\1', CatstoDogs) dogstocats2 = re.sub('<<<тут было слово собак>>>', 'кошек', dogstocats) DogstoCats = re.sub('<<<тут было слово Собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'Кошк\\1', dogstocats2) DogstoCats2 = re.sub('<<<тут было слово Собак>>>', 'Кошек', DogstoCats) catishtodogish = re.sub('кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'собач\\1', DogstoCats2) CatishtoDogish = re.sub('Кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'Собач\\1', catishtodogish) kittenstopyppies = re.sub('котята','щенята', CatishtoDogish) KittenstoPyppies = re.sub('Котята','Щенята', kittenstopyppies) kittentopyppy = re.sub('кот(?:е|ё)н(ок|ку)','щен\\1', KittenstoPyppies) KittentoPyppy = re.sub('Кот(?:е|ё)н(ок|ку)','Щен\\1', kittentopyppy) print(KittentoPyppy) if __name__ == '__main__': main() import re def main(): with open('dates.txt', 'r', encoding = 'utf-8') as f: text = f.read() dates = re.findall('(?:(?:0|1|2)|3(?:0|1))[0-9]\.(?:0|1(?:1|2)?)[0-9]\.[0-9]{2}', text) print(*dates) if __name__ == '__main__': main() import re def main(): with open('aphasy.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', text) while re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear) != clear: clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear) print(clear) if __name__ == '__main__': main() import re def three_consonants(text): cons3 = re.findall('[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[йцкнгшщзхфвпрлджчсмтб]{3}[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[^\s,.!\?:"\(\)\'«»\n]*?[\s,.!\?:"\(\)\'»\n]', text, re.I) for i in range(len(cons3)): cons3[i] = cons3[i].strip('\s,.!\?:"\(\)\'»\n\t ') return cons3 def startwith(text): abcs = re.findall(r'\b(?:а|о)(?:б|в).+?[\s,.!\?:"\(\)\'»\n]', text, re.I) for i in range(len(abcs)): abcs[i] = abcs[i].strip('\s,.!\?:"\(\)\'»\n\t ') return abcs def proper_nouns(text): proper = re.findall('[а-яёa-z0-9] [А-ЯЁA-Z][а-яёa-z]+?[\s,.!\?:"\(\)\'»\n]' , text) for i in range(len(proper)): proper[i] = proper[i].split()[1] proper[i] = proper[i].strip('\s,.!\?:"\(\)\'»\n\t ') return proper def analytical_future(text): future = re.findall('буд(?:е(?:шь|те?|м)|ут?) .+?(?:а|е|и)ть(?:ся)?', text, re.I) return future def polysyllabic(text): poly = re.findall(r'\b(?:[йцкнгшщзхфвпрлджчсмтб]*?[уеыаоюяиэ]){5,}[а-я]*?[\s,.!\?:"\(\)\'»\n]', text) for i in range(len(poly)): poly[i] = poly[i].strip('\s,.!\?:"\(\)\'«»\n\t ') return poly def roman_num(text): rawroman = re.findall('\sC?M*?C?D?L?C{,4}X?L?I?X{,4}I?V?I{,4}\s', text) roman = [] for i in range(len(rawroman)): rawroman[i] = rawroman[i].strip('\s,.!\?:"\(\)\'«»\n\t ') if rawroman[i]: roman.append(rawroman[i]) return roman def main(): with open('text.txt', 'r', encoding = 'utf-8') as f: text = f.read() if __name__ == '__main__': main() import re def clean(html): noscript = re.sub(']*?>[^<>]*?', '', html) nostyle = re.sub(']*?>[^<>]*?', '', noscript) nospan = re.sub(']*?>[^<>]*?', '', nostyle) notags = re.sub('<[^>]*>', '', nospan) notags1 = re.sub('{[^}]*}', '', notags) text = re.sub('[&][^;]*;', ' ', notags1) text = re.sub(r'\s+', ' ', text) return text def html(text): tags = re.findall(r'<[^>]*?>', text) return tags def main(): with open('schizo.txt', 'r', encoding = 'utf-8') as f: text = f.read() with open('html.txt', 'w', encoding = 'utf-8') as f: output = f.write('\n'.join(html(text))) with open('pure.txt', 'w', encoding = 'utf-8') as f: output = f.write(clean(text)) if __name__ == '__main__': main() import re def main(): given = input('Введите свой телефонный номер: ') right = re.search('\+7 \([0-9]{3}\) [0-9]{3}-[0-9]{2}-[0-9]{2}', given) if right: print('Введённый номер совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.') if re.search('\(9(?:2|3)', given): print('Это Мегафон.') elif re.search('\(9(?:1|8)', given): print('Это МТС.') elif re.search('\(96', given): print('Это Билайн.') else: print('Я не могу точно сказать, какой это оператор.') else: print('Введённый номер не совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.') if re.search('\(9(?:2|3)', given) or re.search('\+7 ?9(?:2|3)', given) or re.match('8 ?9(?:2|3)', given): print('Это Мегафон.') elif re.search('\(9(?:1|8)', given) or re.search('\+7 ?9(?:1|8)', given) or re.match('8 ?9(?:1|8)', given): print('Это МТС.') elif re.search('\(96', given) or re.search('\+7 ?96', given) or re.match('8 ?96', given): print('Это Билайн.') else: print('Я не могу точно сказать, какой это оператор.') if __name__ == '__main__': main() import re def revert(dictionary): reverted = {} for key in dictionary: reverted[dictionary[key]] = key return reverted def russian_to_latin_dictionary(lines): raw = {} rus_to_lat = {} for i in range(len(lines)): raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n') raw = revert(raw) for key in raw: if len(key.split(',')) > 1: for i in range(len(key.split(','))): rus_to_lat[key.split(',')[i-1].strip()] = raw[key] i +=10 else: rus_to_lat[key] = raw[key] return rus_to_lat def latin_to_russian_dictionary(lines): raw = {} lat_to_rus = {} for i in range(len(lines)): raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n') for key in raw: if len(key.split(',')) > 1: for i in range(len(key.split(','))): lat_to_rus[key.split(',')[i-1].strip()] = raw[key] i +=10 else: lat_to_rus[key] = raw[key] return lat_to_rus def main(): with open ('latin.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for i in range(len(lines)): lines[i] = re.sub('(?:–|−|-)', '—', lines[i]) lines[i] = re.sub(';', ',', lines[i]) print(latin_to_russian_dictionary(lines)) print(russian_to_latin_dictionary(lines)) if __name__ == '__main__': main() def process(fname): with open (fname, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() forms = text.split() for i in range(len(forms)): forms[i] = forms[i].strip('.,!?*()«»\'":][><') return forms def freqlist(forms): freqs = {} for i in range(len(forms)): if forms[i] not in freqs: freqs[forms[i]] = 1 else: freqs[forms[i]] +=1 return freqs def maxfreq(frequencies): maximumfreq = [] for key in frequencies: if frequencies[key] == max(frequencies.values()): maximumfreq.append(key) return maximumfreq def averagefreq(frequencies): total = 0 for key in frequencies: total += frequencies[key] average = total/len(frequencies) return average def main(): forms = process('text.txt') frequencies = freqlist(forms) print(*maxfreq(frequencies), '- самое частотное слово в тексте.') print(averagefreq(frequencies), '- средняя частота слов в тексте.') if __name__ == '__main__': main() import random def ask(): user_info = [] name = input('Как Вас зовут? ') surname = input('Какая у Вас фамилия? ') age = input('Сколько Вам лет? ') food = input('Какая у Вас любимая еда? ') musician = input('Какая у Вас любимая музыкальная группа? ') dream = input('Какая у Вас заветная мечта? ') user_info.append(name+' '+surname) user_info.append([age, food, musician, dream]) return user_info def guess(database_dictionary): person = random.choice(list(database_dictionary.keys())) clue = random.choice(['его/её мечта: '+database_dictionary[person][3], 'его/её любимая музыкальная группа: '+database_dictionary[person][2], 'его/её мечта: '+database_dictionary[person][3]+'\nего/её любимая еда: '+database_dictionary[person][1], 'его/её любимая музыкальная группа: '+database_dictionary[person][2]+'\nего/её любимая еда: '+database_dictionary[person][1]]) guess = input('Угадайте, кто это (имя и фамилию)? Подсказка: '+clue+' ') if guess == person: return 'Правильно!' else: return 'Нет, неправильно, это - '+person def main(): database = {} i = 0 while i < 7: answer = ask() i += 1 database[answer[0]] = answer[1] print(guess(database)) if __name__ == '__main__': main() import re def main(): with open('Лингвистика.txt', 'r', encoding = 'utf-8') as f: text = f.read() lang = 'язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])' Lang = 'Язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])' new_text = re.sub(lang,'шашлык\\1\\2', text) new_text = re.sub(Lang,'Шашлык\\1\\2', new_text) with open('Новая лингвистика.txt', 'w', encoding = 'utf-8') as f: f.write(new_text) if __name__ == '__main__': main() import os extension_frequency_list = {} for root, dirs, files in os.walk('.'): for f in files: file_name = f.split('.')[0] file_ext = f.split('.')[1] if file_ext not in extension_frequency_list: extension_frequency_list[file_ext] = 1 else: extension_frequency_list[file_ext] += 1 max_ext = max(extension_frequency_list.values()) i = 0 for key in extension_frequency_list: if extension_frequency_list[key] == max_ext: if i == 0: print('The most frequent extention is \''+key+'\'. There is(are) '+str(extension_frequency_list[key])+' file(s) with it.') i = 1 else: print('There is(are) also '+str(extension_frequency_list[key])+' \''+key+'\' file(s).') def opentext(fname): forms = [] with open (fname, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() forms = text.split() for i in range(len(forms)): forms[i] = forms[i].strip('.,!?*()«»\'"') return forms def adj_hood(fname): words = opentext(fname) hoods = [] for i in range(len(words)): if len(words[i])>4: if words[i][-1] == 'd': if words[i][-2] == 'o': if words[i][-3] == 'o': if words[i][-4] == 'h': if words[i] not in hoods: hoods.append(words[i]) return hoods def count_frequency(fname, word): words = opentext(fname) word_freq = 0 for i in range(len(words)): if words[i] == word: word_freq += 1 return word_freq def main(): fname = input('Введите имя файла: ') hoods = adj_hood(fname) print('В тексте встретилось', len(hoods), 'прилагательных с суффиксом -hood.') freq = [] for i in range(len(hoods)): freq.append(count_frequency(fname, hoods[i])) min_freq = [] for i in range(len(hoods)): if freq[i] == min(freq): min_freq.append(hoods[i]) print('Самые редкие прилагательные с суффиксом -hood: ', ', '.join(min_freq)) roots = [] for i in range(len(hoods)): roots.append(hoods[i][0:-4]) print('Корни прилагательных с суффиксом -hood: ', ', '.join(roots)) if __name__ == '__main__': main() words = [] with open('text.txt','r', encoding = 'utf-8') as f: text = f.read() words_raw = text.split() words = [] for i in range(len(words_raw)): words.extend(words_raw[i].split('\n')) len3 = 0 len1 = 0 for word in words: if len(word) == 3: len3 += 1 elif len(word) == 1: len1 += 1 if len1 == 0: print('В файле нет слов длины 1.') elif len3 == 0: print('В файле нет слов длины 3.') else: print('В файле в '+str(len3/len1)+' раз больше слов длины 3, чем слов длины 1.') import random def nom_noun(): with open('nomnouns.txt','r', encoding = 'utf-8') as f: nomnouns = f.read().split('\n') return random.choice(nomnouns) def acc_noun(): with open('accnouns.txt','r', encoding = 'utf-8') as f: accnouns = f.read().split('\n') return random.choice(accnouns) def adverb(): with open('adverbs.txt','r', encoding = 'utf-8') as f: adverbs = f.read().split('\n') return random.choice(adverbs) def intensifier(adv): with open('intensifiers.txt','r', encoding = 'utf-8') as f: intensifiers = f.read().split('\n') return random.choice(intensifiers) + ' ' + adv def verb_of_thought(subj): with open('thoughtverbs.txt','r', encoding = 'utf-8') as f: thoughtverbs = f.read().split('\n') return subj + ' ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.' def trans_verb(subj, obj): with open('transverbs.txt','r', encoding = 'utf-8') as f: transverbs = f.read().split('\n') return subj + ' ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj def trans_verb_negative(subj, obj): with open('transverbs.txt','r', encoding = 'utf-8') as f: transverbs = f.read().split('\n') negative_sentences = [subj + ' не ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj, subj + ' ' + intensifier(adverb()) + ' не ' + random.choice(transverbs)+ ' ' + obj] return random.choice(negative_sentences) def verb_of_thought_negative(subj, obj): with open('thoughtverbs.txt','r', encoding = 'utf-8') as f: thoughtverbs = f.read().split('\n') return subj + ' не ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.' def positive(): positive_sentences = [trans_verb(nom_noun(), acc_noun()) + '.', verb_of_thought(nom_noun())] return random.choice(positive_sentences) def question(): questions = ['зачем ' + trans_verb(nom_noun(), acc_noun()) + '?', 'почему ' + verb_of_thought(nom_noun())] return random.choice(questions) def negative(): negative_sentences = [verb_of_thought_negative(nom_noun(), acc_noun()), trans_verb_negative(nom_noun(), acc_noun())] return random.choice(negative_sentences) def conditional(): with open('transverbs.txt','r', encoding = 'utf-8') as f: transverbs = f.read().split('\n') conditional_sentences = ['если ' + positive().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + positive().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun()] return random.choice(conditional_sentences) def imperative(): with open('imperatives.txt','r', encoding = 'utf-8') as f: imperatives = f.read().split('\n') imperative_sentences = ['пусть ' + positive(), 'пусть ' + negative(), 'пусть ' + conditional(), random.choice(imperatives) + ' ' + acc_noun()] return random.choice(imperative_sentences) def main(): sentences = [positive(), question(), negative(), conditional(), imperative()] random.shuffle(sentences) for i in range(5): print(sentences[i].capitalize()) if __name__ == '__main__': main() a = input("Введите первое число. ") a = float(a) b = input("Введите второе число. ") b = float(b) c = input("Введите третье число. ") c = float(c) if b: if a%b == c: print("a даёт остаток c при делении на b") else: print("a не даёт остаток c при делении на b") else: print("Делите на ноль сами!") if (a*c)+b == 0: print("c является решением линейного уравнения ax + b = 0") else: print("c не является решением линейного уравнения ax + b = 0") list_list = [['l'],['s'],['d']] def el_0(any_list): return any_list[0] a = ' '.join([el_0(el) for el in list_list]) print(a) b = list(map(el_0, list_list)) print(b) b = ' '.join(list(map(el_0, list_list))) print(b) c = ' '.join(list(map(lambda any_list: any_list[0], list_list))) print(c) number = input("print any number") square = int(number)**0.5 if square/1 == square//1: print ("yes") else: print ("no") import codecs, re def open_file(title): a = codecs.open(title, 'r', 'utf-8') words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()] return words def find_bigramm(words): text = '' for word in words: text += word + ' ' found = 0 for x in range(len(words)-1): bigramm = words[x] + ' ' + words[x+1] m = re.findall(bigramm, text, flags = re.U) if len(m) > 2: print(True) found = 1 break if found == 0: print(False) def main(): f = open_file('text.txt') z = find_bigramm(f) if __name__ == "__main__": main() import os, codecs def open_file(title): a = codecs.open(title, 'r', 'utf-8') words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()] return words def count_word_frequency(words): freq_dict = {} for word in words: try: freq_dict[word] += 1 except KeyError: freq_dict[word] = 1 return freq_dict def find_max_keys(dict_num_values, amount): values_list = dict_num_values.values() max_values = [] i = 0 while i < amount: local_max = max(values_list) max_values.append(local_max) if local_max != 1: values_list = [x for x in values_list if x != local_max] i += 1 max_keys = [] for key in dict_num_values: if dict_num_values[key] in max_values: max_keys.append(key) return max_keys def extract_words_from_txt_in_folder(path): words = [] for root, dirs, files in os.walk(path): for f in files: if len(f.split('.')) == 2: file_name = f.split('.')[0] file_ext = f.split('.')[1] if file_ext == 'txt': words += open_file(os.path.join(root, f)) return words def main(): print(find_max_keys(count_word_frequency(extract_words_from_txt_in_folder('.')),10)) if __name__ == "__main__": main() import random with open('words.txt','r', encoding = 'utf-8') as f: lines = f.readlines() lenlines = len(lines) random.shuffle(lines) score = 0 for line in lines: line = line.strip() word, hint = line.split(' ',1) response = input('Какое слово я загадала?\n'+ 'Подсказка: '+hint+' ') if response == word: print('Правильно!') score += 1 else: print('Нет, слово было', word) with open('scores.txt', 'w', encoding = 'utf-8') as n: percent = score/lenlines*100 n.write('Вот результат: ') n.write(str(percent)+'%') import codecs def open_file(file_name): f = codecs.open(file_name, 'r', 'utf-8') words = [] for line in f: line = line.strip() words += line.split() for word in words: word = word.strip('.,!?:;()\'\"1234567890') word = word.lower() return words def bigramms(words): bi = create_list(words) dic = {} for j in bi: if j not in dic: dic[j] = 1 else: dic[j] += 1 answer = '' answer = [n+'\r\n' for n in dic] for key in dic: if dic[key] > 2: answer = True else: answer = False print(answer) return answer def create_list(words): bi = [] for i in range(len(words)): if i < len(words) - 1: j = i+1 bi.append(words[i] + ' ' + words[j]) return bi words = open_file('text.txt') bigramms(words) import re def main(): with open ('china space programm.txt', 'r', encoding = 'utf-8') as f: text = f.read() reg = '«[А-ЯЁа-яё]+?-[1-9]+»' all_matches = re.findall(reg, text) pure_names =[] for i in range(len(all_matches)): if re.sub(r'-[1-9]+', '', all_matches[i]) not in pure_names: pure_names.append(re.sub(r'-[1-9]+', '', all_matches[i])) all_matches += pure_names print(all_matches) if __name__ == '__main__': main() def opentext(fname): forms = [] with open (fname, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() forms = text.split() for i in range(len(forms)): forms[i] = forms[i].strip('.,?*()«»') return forms def first_letter(letter, fname = 'text.txt'): starting_with_letter = [] forms = opentext(fname) for i in range(len(forms)): if forms[i][0] == letter: starting_with_letter.append(forms[i]) return starting_with_letter def questions(): fname = input('Введите имя файла: ') letter = input('Введите букву: ') number = int(input('Введите целое число: ')) starting_with_letter = first_letter(letter, fname) answer = [] for i in range(len(starting_with_letter)): if len(starting_with_letter[i]) > number: answer.append(starting_with_letter[i]) return answer def adjectives(fname): forms = opentext(fname) adj = [] for i in range(len(forms)): if len(forms[i]) > 2: if forms[i][-1] == 'й': if forms[i][-2] == 'o' or forms[i][-2] == 'ы' or forms[i][-2] == 'и': if i != len(forms)-1: adj.append(forms[i]+' '+forms[i+1]) else: adj.append(forms[i]) elif forms[i][-1] == 'я': if forms[i][-2] == 'а' or forms[i][-2] == 'я': if i != len(forms)-1: adj.append(forms[i]+' '+forms[i+1]) else: adj.append(forms[i]) elif forms[i][-1] == 'е': if forms[i][-2] == 'o' or forms[i][-2] == 'е': if i != len(forms)-1: adj.append(forms[i]+' '+forms[i+1]) else: adj.append(forms[i]) return adj with open('information.txt', 'w', encoding = 'utf-8') as n: name = input('Как Вас зовут? ') n.write(name+'\n') age = input('Сколько Вам лет? ') n.write(str(age)+'\n') color = input('Какой у Вас любимый цвет? ') n.write(color+'\n') musician = input('Какой у Вас любимый исполнитель? ') n.write(musician+'\n') dream = input('Какая у Вас мечта? ') n.write(dream+'\n') with open('information about Mary.txt','r', encoding = 'utf-8') as f: info = f.readlines() for line in range(len(info)): info[line] = info[line].strip() response = input('Как Вашего соседа зовут? ') if response == info[0]: print('Правильно!') else: print('Нет, его зовут '+info[0]+'.') response = input('Сколько Вашему соседу лет? ') if str(response) == info[1]: print('Правильно!') else: print('Нет, ему '+info[1]+' лет.') response = input('Какой у Вашего соседа любимый цвет?') if response == info[2]: print('Правильно!') else: print('Нет, его любимый цвет - '+info[2]+'.') response = input('Какой у Вашего соседа любимый исполнитель?') if response == info[3]: print('Правильно!') else: print('Нет, его любимый исполнитель - '+info[3]+'.') response = input('Какая у Вашего соседа мечта?') if response == info[4]: print('Правильно!') else: print('Нет, его мечта - '+info[4]+'.') number = input("What's your phone number") if '+1' in number or number.endswith("2"): pass elif '+7' in number or number.startswith('8'): print("Как дела в России?") elif '+4' in number: print("Как дела в Англии?") else: print("Как дела в мире?") import os, codecs from math import log def preprocess(text): punct = '[.,!«»?&@"$\[\]\(\):;% tabs = '\t\n' text_wo_punct = re.sub(punct, '', text.lower()) text_wo_punct = re.sub(tabs, '', text_wo_punct) words = text_wo_punct.strip().split() return words def count_tf(word, text): n = text.count(word) return n/len(text) def count_df(word, texts): i = [True for text in texts if word in text] df = len(i) return df def count_idf(word, texts): df = count_df(word, texts) try: idf = len(texts)/df except ZeroDivisionError: return 0 return idf def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) tfidf = log(tf, 10) * log(idf, 10) return tfidf def extract_textS_from_folder(path): texts = [] for root, dirs, files in os.walk(path): for f in files: with open(os.path.join(root, f) , "r", encoding = 'utf-8') as t: content = t.read text = preprocess(content) texts.append(text) return texts def keywords(text, texts): keywords = {} dic_tfidf = {} for word in text: if word in dic_tfidf: continue tfidf = count_tfidf(word, text, texts) dic_tfidf[word] = tfidf i = 0 for el in sorted(dic_tfidf, key = lambda x: dic_tfidf(x)): if i > 5: break else: i += 1 keywords[el] = dic_tfidf[el] return keywords def main(): texts = extract_text_from_folder('wikipedia') for t in texts: kwords = keywords(t, texts) for key in kwords: print(key, kwords[key]) if __name__ == "__main__": main() import os import re from math import log def preprocessing(text): punct = '[.,_!«»?&@"$\/\\[\]\(\):;% tabs = '\n\t\s' num = '[0-9]' text_wo_punct = re.sub(punct, '', text.lower()) text_wo_punct = re.sub(tabs, ' ', text_wo_punct) text_wo_punct = re.sub(num, '', text_wo_punct) words = text_wo_punct.strip().split() return words def count_tf(word, text): i = 0 for w in text: if w == word: i += 1 tf = i / len(text) return tf def count_df(word, texts): i = 0 i = [1 for text in texts if word in text] df = sum(i) return df def count_idf(word, texts): df = count_df(word, texts) idf = len(texts)/ (1 + df) return idf def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) tfidf = log(tf, 10) * log(idf, 10) return tfidf def keywords(text, texts): dic_tfidf = {} kwords = {} for word in text: if word in dic_tfidf: continue tfidf = count_tfidf(word, text, texts) dic_tfidf[word] = tfidf i = 0 for el in sorted(dic_tfidf, key=lambda x: dic_tfidf[x]): if i > 5: break else: i += 1 kwords[el] = dic_tfidf[el] return kwords def main(): texts = {} for root, dirs, files in os.walk('wikipedia'): for f in files: with open(os.path.join(root, f), 'r', encoding='utf-8') as t: content = t.read() text = preprocessing(content) texts[f] = text raw_texts = list(texts.values()) for t in texts: print('\nИзвлекаем ключевые слова для текста "{}"'.format(t.split('.')[0])) kwords = keywords(texts[t], raw_texts) for key in kwords: print (key, kwords[key]) if __name__ == '__main__': main() import re import os from math import log def open_words(fname): forms = [] with open (fname, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() punct = '[.,?!|:;*№\"\'—@ text = re.sub(punct, '', text) text = re.sub('[\n\t]', ' ', text) forms = text.split() return forms def make_freq(arr): d = {} for el in arr: try: d[el] += 1 except KeyError: d[el] = 1 return d def make_bigrams(arr): bigrams = [] for i in range(len(arr)-1): bigr = arr[i] + ' ' + arr[i+1] bigrams.append(bigr) return bigrams def count_pmi(x, y): try: p_x = word_freq[x]/len(words) except KeyError: p_x = 0 try: p_y = word_freq[x]/len(words) except KeyError: p_y = 0 try: bigr = x + ' ' + y p_xy = bigrams_freq[bigr]/len(bigrams) except KeyError: p_xy = 0 try: pmi = log(p_xy/(p_x*p_y)) except ZeroDivisionError: pmi = 0 return pmi def calculate_pmi(): pmis = {} for bigr in bigrams: x, y = bigr.split() pmi = count_pmi(x, y) pmis[bigr] = pmi return pmis def calculate_pmi_cats(word, cathegory): p_word = freq_all[word]/len(words_all) p_cat = 1/3 if cathegory == 'anek': d = freq_anek w = len(corpus_anek_words) elif cathegory == 'izvest': d = freq_izvest w = len(corpus_izvest_words) elif cathegory == 'teh': d = freq_teh w = len(corpus_teh_words) p_word_cat = d[word]/w pmi = log(p_word_cat/(p_word*p_cat)) return pmi def main(): corpus_anek_words = [] corpus_izvest_words = [] corpus_teh_words = [] for root, dirs, files in os.walk('texts'): if 'anekdots' in root: for f in files: corpus_anek_words += open_words(os.path.join(root, f)) if 'teh_mol' in root: for f in files: corpus_teh_words += open_words(os.path.join(root, f)) if 'izvest' in root: for f in files: corpus_izvest_words += open_words(os.path.join(root, f)) words = corpus_anek_words + corpus_teh_words + corpus_izvest_words freq_anek = make_freq(corpus_anek_words) freq_izvest = make_freq(corpus_izvest_words) freq_teh = make_freq(corpus_teh_words) freq_all = make_freq(words) words_cathegory_dict = {} for w in words: i = 0 try: if i < 100: pmi_anek = calculate_pmi_cats(w, 'anek') pmi_cats(w, 'anek') pmi_izvest = calculate_pmi_cats(w, 'izvest') pmi_teh = calculate_pmi_cats(w, 'teh') pmi_max = max(pmi_anek, pmi_izvest, pmi_teh) if pmi_max == pmi_anek: words_cathegory_dict[w] = 'anek' if pmi_max == pmi_teh: words_cathegory_dict[w] = 'teh' if pmi_max == pmi_anek: words_cathegory_dict[w] = 'teh' i += 1 except KeyError: pass print(words_cathegory_dict) if __name__ == '__main__': main() import shutil import os name = input('Print any sentence. ') words = name.split() path = words[0] for i in range(1, len(words)): path = os.path.join(path, words[i]) os.makedirs(path) import shutil import os num = int(input('Print any natural number. ')) for i in range(num): name = str(i+1) os.makedirs(name) for a in range(i+1): filename = os.path.join(name,str(a+1)+'.txt') with open(filename, 'w', encoding = 'utf-8') as f: f.write('') import os import shutil filelist = [f for f in os.listdir() if os.path.isfile(f)] print(filelist) import os import shutil path = os.path.abspath('.') path2 = os.getcwd() universalpath = os.path.join('texts', '1.txt') exists = os.path.exists('texts\1.txt') exists2 = os.path.exists(os.path.join('texts', '1.txt')) filelist = os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts') s = 'Hello! ' i = 1 for f in filelist: if f.endswith('.txt'): with open(f, 'a', encoding = 'utf-8') as w: w.write(s*1) i += 1 texts = [f for f in os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts') if f.endswith('.txt')] if not os.path.exists('ab'): os.mkdir('ab') if not os.path.exists(r'a\long\long\long\long\path'): os.makedirs(r'a\long\long\long\long\path') if os.path.exists('ab') and not os.path.exists('abc'): os.rename('ab', 'abc') if os.path.exists(r'a\long\long\long') and not os.path.exists(r'a\long\long\longer'): os.rename(r'a\long\long\long', r'a\long\long\longer') isfile = os.path.isfile(r'texts\1.txt') isdir = os.path.isdir(r'a\long\long') print(os.listdir()) shutil.copy(r'texts\1.txt', r'newcorpus') shutil.copytree(r'texts', r'corpus') shutil.move(r'texts\2.txt', r'newcorpus') os.remove(r'corpus\2.txt') shutil.rmtree('newcorpus') shutil.rmtree('a') shutil.rmtree('abc') import os import shutil directory = input('Print any path working in your OS. ') if os.path.exists(directory): filelist = [f for f in os.listdir() if os.path.isfile(f)] extlist = [] for f in filelist: ext = f.split('.')[1] if ext not in extlist: extlist.append(ext) extdict = {} for ext in extlist: for f in filelist: if f.endswith(ext): if ext not in extdict: extdict[ext] = 1 else: extdict[ext] +=1 else: directory = os.getcwd() print(extdict) import re def main(): with open ('hse.html', 'r', encoding = 'utf-8') as f: text = f.read() card_reg = '
(\2-\d))' def find(): with open('lang.html', 'r', encoding = 'utf-8') as f: content=f.read() isos=re.findall(reg, content) return isos def save(): isos=find() with open('isos.txt', 'w', encoding = 'utf-8') as n: for iso in isos: n.write(iso[2] + '\n') save() import os import re def maxfiles (): numfiles = 0 name = '' for root, dirs, files in os.walk('.'): if len (files) > numfiles: numfiles = len (files) name = re.sub(r'.*/', '', root) print ('Больше всего файлов в папке:', name) maxfiles () import os import re allobj = os.listdir('.') lat = r'[A-Za-z]' kir = r'[А-Яа-я]' folders = [] fold_new = [] folds = allobj def fold_num(): for f in folds: if os.path.isfile(f) == True: folds.remove(f) for fl in folds: if re.search(lat, fl) != None and re.search(kir, fl) != None: folders.append(fl) print ('папок, название которых содержит и кириллические, и латинские символы:', len(folders)) def norepeat(): for obj in allobj: index = obj.rfind('.') if index != -1: obj = obj[:index] if fold_new.count(obj) == 0: fold_new.append(obj) print (obj) fold_num() norepeat() import re def text(): with open('text.txt', 'r', encoding = 'utf-8') as f: f = f.read() f=f.lower() sens = re.split('[.\?!] ',f) sens1 = [re.sub (r'[,.()?!:;-]', '', sen) for sen in sens ] sens_new = [ sen.split(' ') for sen in sens1 ] return sens_new def word_num(): sens = text() number = {word: sen.count(word) for sen in sens for word in sen} flat = [word for sen in sens for word in sen] for word in flat: if flat.count(word)>1: flat.remove(word) for word in flat: if number[word]>1: print(word, '{:^10}'.format(number[word])) word_num() import random def opendict(): d={} with open ('dict.csv', 'r', encoding='utf-8') as f: text = f.readlines() for p in text: prs=[] pr=p.strip('\n') prs=p.split() d[prs[0]]=prs[1] return d def zag(): d=opendict() klus=[] for key in d: klus.append(key) klu=random.choice(klus) print('отгадай-ка слово:', klu,'...') return klu def good(): with open ('good.txt', 'r', encoding='utf-8') as f: text = f.readlines() well=random.choice(text) return print(well) def oops(): with open ('false.txt', 'r', encoding='utf-8') as f: text = f.readlines() false=random.choice(text) return print(false) def otvet(): klu=zag() d=opendict() slovo=input('ответ:') if slovo==d[klu]: return good() else: return oops() otvet() with open('textr.txt', 'r', encoding = 'utf-8') as t: fiveword=0 numline=0 print(t.read) for line in t: words=[] words=line.split(' ') if len(words)>5: fiveword+=1 numline+=1 print (fiveword) print ('Кол-во строк:', numline) print ('В файле '+str(int(((fiveword/numline)*100)//1))+'% строк, в которых больше пяти слов') def filename(): fname=input('Введите имя файла ') return fname def opentext (): with open (filename(), 'r', encoding='utf-8') as f: text=f.read() textl=text.lower() ws = textl.split() tekst=[] for w in ws: wstr=w.strip('!?.,:;()') tekst.append(wstr) return tekst def unws(tekst): text=tekst unws=[] for w in text: if w[0:2]=='un': unws.append(w) return unws def nunws(unws): return len(unws) def perc (num): ws=opentext() longw=[] for w in unws(ws): if len(w)>num: longw.append(w) return str(int(len(longw)/nunws(unws(ws))*100))+'%' perc(int(input('Введите число '))) print('Введите три числа в столбик') a = int (input()) b = int (input()) c = int (input()) if a/b==c : print ( 'условие 5 соблюдается (a/b=c)') else: print ( 'условие 5 не соблюдается (a/b≠c)') if a**b==c : print ( 'условие 6 соблюдается (a^b=c)') else: print ( 'условие 6 не соблюдается (a^b≠c)') import re pti=r'птиц' ppti=r'Пт[ии́]ц' fish=r'рыб' ffish=r'Рыб' ptiey=r'птицей' pptiey=r'Птицей' fishy=r'рыбой' ffishy=r'Рыбой' def cont(): with open('birds.html', 'r', encoding = 'utf-8') as f: content=f.read() return content def text(): birds=cont() ryba=re.sub(pti,fish, birds) birds=re.sub(ppti,ffish,ryba) ryba=re.sub(ptiey,fishy,birds) birds=re.sub(pptiey,ffishy,ryba) return birds def save(): new=text() with open('ryby.txt', 'w', encoding = 'utf-8') as n: n.write(new) return new save() file = open("your txt file here","r",encoding='utf8') arriva = 0 arr = 0 for word in file.read().split(): if len(word) >= 0: arriva += 1 if len(word) >= 10: arr += 1 print(arr/arriva*100) file.close() d = {'длинная': "дорога", 'пустая': "бутылка", 'великий': "новгород", 'белокаменная': "москва", 'пребрежный': "город", 'серая': "мышь", 'ласковый': "кот", 'певучая': "птица", 'старый': "дуб", 'дождливая': "погода"} keys_list = list(d.keys()) arr = 1 import random g = random.choice(keys_list) p = input(g + ":") se = g,p while se not in d.items(): arr += 1 p = input("попробуй ещё:") se = g,p else: print("ты угадал") print("всего попыток:", arr) import os count = 0 for root, dirs, files in os.walk('.'): for f in files: if f.split('.') not in names: count += 1 names.append(f.split('.')) print('Найдено {} файла(ов):'.format(count)) a = int(input("Введите первое число:")) b = int(input("Введите второе число:")) c = int(input("Введите третье число:")) if a * b == c: print("Умножение удалось") else: print("умножение не удалось") if a / b == c: print("Деление удалось") else: print("Деление не удалось") arr = [] word = input('latin word please:') while word: arr.append(word) word = input('latin word please:') for w in arr: if w.endswith('t'): print(w) import re regex = "(съе)(л|в?|сть?)(а?|и?|ш?)?(ий?|ая?|ие?)? " f = open("your file here.txt", "r" ,encoding='utf-8') j = re.findall(regex, f.read()) print(*j, sep = '\n') n = int(input("Введите число:")) for i in range(1,11): print(i*n) wordic = input('enter your word:') for index,_ in enumerate(wordic): print (" ".join(wordic[:index+1])) import random def noun(): nouns = ['собака','велосипед','коробка','радуга','телефон','тетрадь','хлеб','пирог','замок','билет','бритва','скамейка','магазин','колесо','щкафчик','тарелка'] return random.choice(nouns) def adverb(): nouns = ['жарко','холодно','больно','страшно','громко','вызывающе','немедленно','быстро','яростно','высоко','далеко','мужественно','скучно'] return random.choice(nouns) def verb(): nouns = ['горит','лежит','бежит','едет','прыгает','кушает','поет','разминается','опаздывает','тонет','сидит','идет','кидает','включает','пишет','дерется'] return random.choice(nouns) def sub_conj(): nouns = ['потому что','если','пока','когда','так что','ибо'] return random.choice(nouns) def comp_conj(): nouns = ['и','также','а','но','однако','зато'] return random.choice(nouns) def random_sentence(): sentence = noun() + ' ' + verb() + ' ' + adverb() + ' ' + (sub_conj() or comp_conj()) + ' ' + noun() + ' ' + verb() + ' ' + adverb() + '.' return sentence num_of_sents = random.randint(6, 20) for i in range(num_of_sents): sentence = random_sentence() sentence = sentence.capitalize() print(sentence, end=' ') def function(s): file = open(s, 'r', encoding='utf-8') words = [word for line in file for word in line.split()] return words def count(): adj = 0 for line in (function("dzzz.txt")): for word in line.split(): if word.endswith('ons'): adj += 1 return adj def average(): a = [] for line in (function("dzzz.txt")): for word in line.split(): if word.endswith('ons'): a.append(word) av = sum(len(word) for word in a)/len(a) return av print('всего прилагательных с суффиксом "-ons":', count()) print('средняя длина:', average()) import os import re total_number = 0 for m in os.listdir("C:\\Users\\Никита\\Desktop\\homework 16\\papka"): if re.findall('\w+' ' ' '\w+', m): total_number +=1 print("папок с двумя и более словами:", total_number) for m in os.listdir("C:\\Users\\Никита\\Desktop\\homework 16\\papka"): s = re.findall('\w+' ' ' '\w+', m) for e in s: print(s) counter = 0 f = open('C:\\Users\\Никита\\Desktop\\kontrosha\\kontrol.txt').read() for lines in f.split(): if '' not in lines: counter += 1 else: break print(counter) with open('C:\\Users\\Никита\\Desktop\\kontrosha\\resultat.txt', 'w', encoding='utf-8') as file: print(counter, file=file) print('результат распечатан в файл') import re def open_html(f): with open(f, 'r', encoding='utf-8') as file: content = file.read() return content content = open_html(r'C:\Users\Никита\Desktop\hw14 (families)\arbuz.html') Link = r'(.*?)' links = re.findall(Link, content) for link in links[:10]: result = link[2] tu = open('dz.txt', 'w').write((result)) file = open('dz.txt').read() print('в файле должно оказаться:', file) import re def open_html(f): with open(f, 'r', encoding='utf-8') as file: content = file.read() return content content = open_html(r'C:\Users\Никита\Desktop\hw15 cats and dinos\dinos.html') m = re.sub('динозавр', 'кот', content, flags= re.M) p = re.sub('Динозавр', 'Кот', m, flags= re.M) print(p) with open('result.txt', 'w', encoding='utf-8') as file: print(p, file=file) print('результат распечатан в файл') import re def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() l = text.split() l1 = [] for word in l: l1.append(word.strip('.,;:?![]')) f.close() return l1 def regexp(text): for word in text: if re.search('.*[аеёиоуиыэюяАЕЁИОУИЫЭЮЯ].*[аеёиоуиыэюяАЕЁИОУИЫЭЮЯ].*[аеёиоуиыэюяАЕЁИОУИЫЭЮЯ]' , word): print(word) def main(): text = text_process('text.txt') regexp(text) main() word = input() while len(word) > 0: print('Nominative singular') print('Accusative singular') if word.endswith('а') or word.endswith('я'): print('Genitive singular') print('Accusative singular') print('Nominative plural') if word.endswith('у') or word.endswith('ю'): print('Dative singular') if word.endswith('ом') or word.endswith('ем'): print('Instrumentalis singular') if word.endswith('е'): print('Prepositive singular') if word.endswith('ы'): print('Nominative plural') print('Accusative plural') if word.endswith('ов') or word.endswith('ев') or word.endswith('ой') or word.endswith('ей'): print('Genitive plural') print('Accusative plural') if word.endswith('ам') or word.endswith('ям'): print('Dative plural') if word.endswith('ами') or word.endswith('ями'): print('Instrumentalis plural') if word.endswith('ах') or word.endswith('ях'): print('Prepositive plural') word = input() def corpus_process(text_name): f = open(text_name, 'r') text = f.read() l = [] for line in text: l.append(line) f.close() return l def count_frequency(word, text): n = 0 for s in text: if word in s: n += 1 return n def lemma_dict(text): dictionary = {} for s in text: if '', '', text_str) text = re.sub('\n', '', text) text_arr = text.split(' ') return text_arr def find_ins(text_lines): words_ins = [] for line in text_lines: if '=ins' in line: word = re.match('(.*?)<', line).group(1) words_ins.append(word) return words_ins def make_string(words_ins, text_arr): line_arr = [] text_arr_e = [i.strip('&.,?:"«»;!()') for i in text_arr] for word in words_ins: n = text_arr_e.index(word) left_context = [] for i in range(n-3, n-1): try: left_context.append(text_arr[i]) except Exception: continue right_context = [] for i in range(n+1, n+3): try: right_context.append(text_arr[i]) except Exception: break line = ' '.join(left_context)+'\t'+word+'\t'+' '.join(right_context) line_arr.append(line) string = '\n'.join(line_arr) return string def main(): n = (count_ana_word(open_xml())) print(n) gr_dict = find_lex(open_xml_as_string()) array = [i+'\t'+gr_dict[i] for i in gr_dict.keys()] write('\n'.join(array), 'frq_gr.txt') write((make_string(find_ins(open_xml()), get_text(open_xml_as_string()))), 'words_ins.txt') if __name__ == '__main__': main() def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() l = text.split() l1 = [] for word in l: l1.append(word.strip('.,;:?!')) f.close() return l1 def count(text, letter, letter2): n = 0 for word in text: if word.startswith(letter) or word.startswith(letter2): n += 1 return n def letters_dict(text): dictionary = {} alphabet = 'абвгдеёжзийклмнопрстуфхцчшщьыъэюя' ALPHABET = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ' for letter in alphabet: letter2 = ALPHABET[alphabet.find(letter)] dictionary[letter] = count(text, letter, letter2) def main(): text = text_process('text.txt') dictionary = letters_dict(text) text2 = open('text2.tsv', 'w') for letter in dictionary: text2.write(letter + '\t' + str(dictionary[letter])) text2.close() main() import os def remove_tree(folder): for root, dirs, files in os.walk(folder): for f in files: os.remove(os.path.join(root, f)) for d in dirs: os.rmdir(os.path.join(root, d)) os.rmdir(folder) remove_tree('folder') import csv def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() l = text.split() l1 = [] for word in l: l1.append(word.strip('.,;:?!')) f.close() return l1 def freq_dict(text): dictionary = {} for word in text: freq = 0 for i in text: if i == word: freq += 1 dictionary[word] = freq return sorted(dictionary) def main(): text = text_process('text.txt') dictionary = freq_dict(text) text2 = open('text2.tsv', 'w') for word in dictionary: text2.write(word + '\t' + str(dictionary[word])) text2.close() main() import os def new_sentence(sentence): sentence1 = '' for word in sentence: sentence1 += word.strip('.,;:?!') + ' ' sentence1 += '.' return sentence1 def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() text = text.replace('!','.') text = text.replace('?', '.') text = text.replace('...','.') l = text.split(.) l1 = [new_sentence(sentence) for sentence in text] f.close() return l1 def count_longest(text): n = 0 longest = [] for sent in text: sent1 = sent.split(' ') if len(sent1) > n: longest = sent1 n = len(sent1) return longest def create_folders(sent): sent = '/'.join(sent) os.makedirs(sent) def main(): text = text_process('text.txt'): sent = count_longest(text) create_folders(sent) main() import random number = random.randint(0,9) guess = int(input()) while guess != number: print('No') if guess < number: print ('больше') else: print ('меньше') guess = int(input()) print('Yes') def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() l = text.split() l1 = [] for word in l: l1.append(word.strip('.,;:?!')) f.close() return l1 def position_dict(text): dictionary = {} for word in text: dictionary[word] == text.index[word] return(dictionary) def main(): text = text_process('text.txt') dictionary = position_dict(text) s = 'Слово {} находится на месте номер {} \n' text2 = open('text2.txt', 'w') for word in dictionary: text2.write(s.format(word, str(dictionary[word]))) text2.close() main() import re def xml_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() l = text.split('\n') l1 = [] for tag in l: l1.append(tag) f.close() return l1 def ana_word(xml): anas = [] for tag in xml: if tag startswith.(''): anas.append(tag.count('') + ' ' for word in xml[(xml.index(tag))]: s += '\t' + word.strip('<.*>') + '\t' for word in xml[(xml.index(tag) + 1),(xml.index(tag) + 3)]: s += word.strip('<.*>') + ' ' s += '\n' return s def main(): xml = text_process(text.xml) print(ana_word(xml)) dictionary = dict_parts_of_speech(xml) d = open('dict.txt', 'w') for i in dictionary: d.write(i + '\t' + dictionary[i] + '\n') ins = open('ins.txt', 'w') for tag in xml: ins.write(instrumentalis(tag)) d.close() ins.close() main() import re def corpus_process(text_name): f = open(text_name, 'r') text = f.read() l = [] for line in text: l.append(line) f.close() return l def count_frequency(word, text): n = 0 for s in text: if word in s: n += 1 return n def count_adj(text): dictionary = {} for s in text: if re.search('type=l.f.*', text) != None: wordtype = s number = wordtype.find('type=') wordtype = wordtype[number, len(s)] wordtype.replace('type="', '') number2 = wordtype.find('"') wordtype = wordtype[0, number] wordtype = '"' + wordtype n = count_frequency(wordtype, text) dictionary[wordtype] = n return dictionary def write_in_file(corpus): dictionary = count_adj(corpus) adjectives = open('adjectives.txt', 'w') for i in dictionary: adjectives.write(i + ' ' + dictionary[i] + '\n') adjectives.close() def main(): corpus = corpus.process('corpus.xml') write_in_file(corpus) corpus1 = corpus n1 = corpus1.index('') n2 = corpus1.index('') for s in corpus1[n1 + 1, n2 - 1]: s = re.sub('', ', ', s) s = re.sub('', '', s) corpus1_file = open(corpus1.txt, 'w') for i in corpus1: corpus1_file.write(i + '\n') corpus1_file.close() main() import os def create_letters_list(): file_tree = os.walk('.') letters = {} for d in file_tree: folder_name = d[0].strip('.\/') letter = folder_name[0] if letter in letters: letters[letter] += 1 else: letters[letter] = 1 return letters def main(): letters = create_letters_list letter = '' n = 0 for i in letters: if letters[i] > n: letter = i n = letters[i] print(letter) main() import re import os import csv def open_file(xml): with open(xml, 'r', encoding = 'cp1251') as f: text = f.readlines() return text def open_file_as_string(xml): with open(xml, 'r', encoding = 'cp1251') as f: text = f.read() return text def count_words(text): text_as_string = open_file_as_string(text) return str(text_as_string.count('')) def find_author(text): text_as_string = open_file_as_string(text) author = re.search('') author = auth.lstrip('') return author def find_created(text): text_as_string = open_file_as_string(text) created = re.search('') created = auth.lstrip('') return created def main(): filetree = os.walk('news') task1 = open('task1.txt', 'w', encoding = 'cp1251') for root, dirs, files in filetree: for f in files: task1.write(f + '\t' + count_words(f) + '\n') task1.close() task2 = open('task2.csv', 'w', encoding = 'cp1251') writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL) for root, dirs, files in filetree: for f in files: f.writerow([f] + [find_author(f)] + [find_created(f)]) if __name__ == '__main__': main() import re import os import csv def open_file(xml): with open(xml, 'r', encoding = 'cp1251') as f: text = f.readlines() return text def open_file_as_string(xml): with open(xml, 'r', encoding = 'cp1251') as f: text = f.read() return text def count_words(text): text_as_string = open_file_as_string(text) return str(text_as_string.count('')) def find_author(text): text_as_string = open_file_as_string(text) author = re.search('') author = auth.lstrip('') return author def find_created(text): text_as_string = open_file_as_string(text) created = re.search('') created = auth.lstrip('') return created def main(): filetree = os.walk('news') task1 = open('task1.txt', 'w', encoding = 'cp1251') for root, dirs, files in filetree: for f in files: task1.write(f + '\t' + count_words(f) + '\n') task1.close() task2 = open('task2.csv', 'w', encoding = 'cp1251') writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL) for root, dirs, files in filetree: for f in files: f.writerow([f] + [find_author(f)] + [find_created(f)]) if __name__ == '__main__': main() import os filetree = os.walk('news') for root, dirs, files in filetree: for f in files: print(f) def new_sentence(sentence): sentence1 = '' for word in sentence: sentence1 += word.strip('.,;:?!') + ' ' sentence1 += '.' return sentence1 def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() text = text.replace('!','.') text = text.replace('?', '.') text = text.replace('...','.') l = text.split(.) l1 = [new_sentence(sentence) for sentence in text] f.close() return l1 def create_dict(text): dictionary = {sentence: {word: len(word) for word in sentence} for sentence in text} def main(): text = text_process('text.txt') return(create_dict(text)) main() import random n = open('nouns.txt', 'r') nouns = [line.strip() for line in n] v = open('verbs.txt', 'r') verbs = [line.strip() for line in v] c = open('clitics.txt', 'r') clitics = [line.strip() for line in c] n2 = open('nouns2.txt', 'r') nouns2 = [line.strip() for line in n2] p = open('marks.txt', 'r') punctuation = [line.strip() for line in p] i = open('imperatives.txt', 'r') imperative = [line.strip() for line in i] def verse1: return (random.choice(nouns)+ ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation)) def verse2: return(random.choice(imperative) + ' ' + random.choice(nouns) + ' ' + random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(punctiation)) seq)) def verse3: return (random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation)) def make_verse: verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(make_verse)import os def symbols(s): ans = True for i in s: if i not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz': ans = False return ans def main(): n = 0 for f in os.listdir('.'): if symbols(f) = True: n += 1 print (f) print (n) main() s = input() l = [] while s != '': if len(s) > 5: l.append(s) s = input() for i in(l): print(i)s = input() for i in range(0, len(s) + 1): print(s[0:i])def text_process(text_name): f = open(text_name, 'r', encoding='utf-8') text = f.read() l = text.split() l1 = [] for word in l: l1.append(word.strip('.,;:?![]{}')) f.close() return l1 def count_ness(text): list_ness = [] for word in text: if word.endswith(ness): list_ness.append(word) return list_ness def frequency(word, text): n = 0 for i in text: if i == word: n += 1 return n def main(): text = text_process('text.txt') words = {} for word in count_ness(text): words[word] = frequency(word, text) frequencies = word.values() print(len(count_ness(text))) print(max(frequencies)) main()import re def count_line(): with open('Test.xml', 'r', encoding = 'utf-8') as f: s = 1 for line in f: if line != ' \n': s += 1 else: break return s def write_in(): with open('Test.txt', 'w', encoding = 'utf-8') as f: num = count_line() f.write(str(num)) return write_in() def open_text(): with open('Test.xml', 'r', encoding = 'utf-8') as f: text = f.read() return text def phrase(): text = open_text() d = {} reg = re.findall(r'.*?',text) for i in range(len(reg)): if reg[i] not in d: d[reg[i]] = 1 else: d[reg[i]] +=1 return d def write_phrase(): with open('Test1.txt', 'w', encoding = 'utf-8') as f: d = phrase() for key in d: f.write(key + ',' + str(d[key])+ '\n') return write_phrase() def n(): text = open_text() reg = re.findall(r'(.*?)',text) return reg print(n()) word = input('Введите слово') for i in range(len(word)): print(word[i::] + word [:i]) print('Введите число') a=float(input()) print('Введите число') b=float (input()) print('Введите число') c=float(input()) if a%b==c: print ('a даёт остаток c при делении на b') else: print('a не даёт остаток c при делении на b') if a*c+b==0: print ('c является решением линейного уравнения ax + b = 0') else: print('c не является решением линейного уравнения ax + b = 0') import os def files(): dic={} for root, dirs, files in os.walk('.'): for f in files: f = f[f.rfind('.')+1:] if f not in dic: dic[f] = 1 else: dic[f]+=1 for key in dic: if dic[key] == max(dic.values()): return key print(files()) import random def noun_f(): file = open ('Существительные_ж.txt' , 'r', encoding = 'utf-8') for line in file: noun = line.split() file.close() return random.choice(noun) def noun_m(): file = open ('Существительные_м.txt' , 'r', encoding = 'utf-8') for line in file: nouns = line.split() file.close() return random.choice(nouns) def noun_number_of(): file = open ('Существительные_множественные.txt' , 'r', encoding = 'utf-8') for line in file: nouns = line.split() file.close() return random.choice(nouns) def adjective_m(word): file = open ('Прилагательные_м.txt' , 'r', encoding = 'utf-8') for line in file: adjectives = line.split() file.close() return random.choice(adjectives) + ' ' + word def adverb(): file = open ('Наречия.txt' , 'r', encoding = 'utf-8') for line in file: adverbs = line.split() file.close() return random.choice(adverbs) def verb_f(subj): file = open ('Глаголы_ж.txt' , 'r', encoding = 'utf-8') for line in file: verbs = line.split() file.close() return random.choice(verbs) + ' ' + subj def verb_m(adv,n): file = open ('Глаголы_м.txt' , 'r', encoding = 'utf-8') for line in file: verbs = line.split() file.close() return adv + ' ' + n+ random.choice(verbs)+ ' ' def verb_inf(): file = open ('Глаголы_инф.txt' , 'r', encoding = 'utf-8') for line in file: verbs = line.split() file.close() return random.choice(verbs) def verb_transitive(obj): file = open ('Глаголы_переход.txt' , 'r', encoding = 'utf-8') for line in file: verbs = line.split() file.close() return ', который ' + random.choice(verbs) + ' ' + obj def verb_imp(): file = open ('Глаголы_пов.txt' , 'r', encoding = 'utf-8') for line in file: verbs = line.split() file.close() return random.choice(verbs) def time(): file = open ('Время.txt' , 'r', encoding = 'utf-8') for line in file: time = line.split() file.close() return random.choice(time) def pronoun(): file = open ('Местоимения.txt' , 'r', encoding = 'utf-8') for line in file: pronouns = line.split() file.close() return random.choice(pronouns) def no(): no = [ 'не ', ''] return random.choice(no) def random_sentence1(): sentence = 'Иди и ' + verb_imp() + ' мне ' + noun_m()+'а' + '!' return sentence def random_sentence2(): sentence = adjective_m(noun_m()) + verb_transitive(noun_number_of())+ ',' +\ verb_m(adverb(), no()) + verb_inf() + '.' return sentence def random_sentence3(): sentence = 'Где ' + time() + ' ' + verb_f(noun_f()) + '?' return sentence def random_sentence4(): sentence = 'Если б ' + pronoun() + ' был ' + noun_m()+ ', то ' +\ verb_m(adverb(), no())+ ' бы ' + verb_inf() + '.' return sentence def random_text(): sentences = [random_sentence1(), random_sentence2(), random_sentence3(), random_sentence4()] return random.choice(sentences) print("---- FASCINATING MASTERPIECE STARTS HERE ----") num_of_sents = 5 for i in range(num_of_sents): sentence = random_text() sentence = sentence.capitalize() print(sentence, end=' ') print("\n---------AND ENDS HERE ---------") print ('Введите слово') word = input() for letter in word[::-1]: if letter not in 'з,я': print (letter) if letter in 'з,я': continue print (letter) import re import os def text_read(): for root, dirs, files in os.walk('.'): for f in files: if f.endswith('.xml'): with open( f, 'r', encoding = 'utf - 8') as text: text = text.read() return text def count(): text = text_read() reg1 = re.findall(r'.*', text) num = len(reg1)/len(reg2) return num print(count()) def part_of_speech(): text = text_read() dic = {} reg = re.findall(r'gr="([A-Z]*)', text) for i in reg: if i not in dic: dic[i] = 1 else: dic[i]+=1 return dic print(part_of_speech()) def write_in(): with open('Test1.txt', 'w', encoding = 'utf-8') as f: d = part_of_speech() template = '{}{:>10}' for key in sorted(d): f.write((template.format(key, d[key]))+ '\n') return def write(): with open('Test1.txt', 'w', encoding = 'utf-8') as f: d = part_of_speech() for key in sorted(d): f.write(key+'\t'+str(d[key])+ '\n') return write() import re def open_text(): with open('Programming.txt', 'r', encoding = 'utf - 8') as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip(',.?!-') return arr def prog(): arr = open_text() regex = r'\bпрограммир(ова(ть(ся)?|нн(ым|о(е|го|му?))|вш(ая|ую|и(е|й|ми?|х)|е(й|е|му?|го))(ся)?|в|л([иа]?(сь)?)|(ся)?)|у((я(сь)?|ем(о(е|го|й|му?)|ы(е|й|х|ми?)|ая|ую)|ю(щ(ая|ую|и(е|й|х|ми?)|е(го|й|му?))(ся)?))|ют(ся)?|е((шь|т|ем)(ся)?)|ю(сь)?|ете(сь)?))\b' arr1 = [] for i in range(len(arr)): m = re.search(regex,arr[i]) if m != None: if arr[i] in arr1: pass else: arr1.append(arr[i]) return ', '.join(map(str,arr1)) print(prog()) import re def open_s(): with open ('Высшая школа экономики — Википедия.html','r', encoding = 'utf - 8')as f: content = f.read() links = r'Преподаватели
' if re.search(card_reg, text): card = re.search(card_reg, text).group() t_reg = 'Преподаватели(?:.|\n)*?

(.+?)<' if re.search(t_reg, card): profs = re.search(t_reg, card).group(1) with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f: f.write(profs) else: print('No data about the nuber of professors found!') with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f: f.write('No data about the nuber of professors found!') else: print('No card found in this article!') with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f: f.write('No card found in this article!') if __name__ == '__main__': main() import os import re def tagsaway(sentence): s = '' for word in sentence: word = re.sub(u'<.+?>', u'', word) s = s + word + ' ' return s def get_bigramms(text): bi = [] text = text.split('') for i, word in enumerate(text): if 'gr="A=' and 'gen' in word: if i+1 < len(text): w = text[i+1] if 'gr="S,' and 'gen' in w: result1 = re.search('(.+?)', word) result2 = re.search('(.+?)', w) bi.append([result1.group(1), result2.group(1), tagsaway(text)]) return bi def newfile(arr): f = open('bigramms.txt', 'w', encoding = 'utf8') s = '' for i in arr: s = i[0] + '\t' + i[1] + '\t' + i[2] + '\n' f.write(s) s = '' f.close def filework(): folder = 'news' for file in os.listdir(folder): with open(os.path.join(folder, file)) as text: text = text.read().split('') for se in text: newfile(get_bigramms(se)) def main (): filework() main() import os import re def get_author (text): for word in text: if 'name="author"' in word: result = re.search('content="(.+?)"', word) return result.group(1) def get_day (text): for word in text: if 'name="created"' in word: result = re.search('content="(.+?)"', word) return result.group(1) def file_inf(): ff = [] folder = 'news' for file in os.listdir(folder): with open(os.path.join(folder, file)) as text: text = text.read().split('<') ff.append([file, get_author(text), get_day(text)]) return ff def newfile(arr): f = open('files_info.csv', 'w', encoding = 'utf8') f.write('Название файла;Автор;Дата создания текста\n') s = '' for i in arr: s = i[0] + ';' + i[1] + ';' + i[2] + '\n' f.write(s) s = '' f.close def main (): ff = file_inf() newfile(ff) main() import os import re def files(): ff = {} folder = 'news' for file in os.listdir(folder): with open(os.path.join(folder, file)) as text: words = re.findall('', text.read()) ff[file] = len(words) return ff def newfile(dic): f = open('words_in_files.txt', 'w', encoding = 'utf8') s = '' for k in dic: s = k + '\t' + str(dic[k]) + '\n' f.write(s) s = '' f.close def main (): ff = files() newfile(ff) main() import os def findanddel (folder): for root, dirs, files in os.walk(folder, topdown = False): for f in files: os.remove(os.path.join(root, f)) for d in dirs: os.remove(os.path.join(root, d)) def main (): folder = input() findanddel (folder) main () import os def draw (): for root, dirs, files in os.walk ('.'): for d in dirs: print ('\t'*root.count('\\'), '--',d) for f in files: print ('\t'*root.count('\\'), f) def main (): draw() main () def pointsaway (file): file = file.split() for i, word in enumerate (file): file[i] = file[i].strip('.,?!()*&^%$ file[i] = file[i].lower() return file def words (file): slova = {} for word in file: if word in slova: slova[word] += 1 else: slova[word] = 1 return slova def creation (dic): f = open ('file.tsv', 'w', encoding = 'utf8') arr = [] for k in dic: arr.append(k) arr.sort() for i in arr: f.write(i + '\t' + str(dic[i]) + '\n') f.close() def main (): f = open ('file.txt', 'r', encoding = 'utf8') file = f.read() f.close() text = pointsaway (file) semua = words (text) creation (semua) main () def pointsaway (file): file = file.split() for i, word in enumerate (file): file[i] = file[i].strip('.,?!()*&^%$ file[i] = file[i].lower() return file def creation (text): f = open ('new.txt', 'w', encoding = 'utf8') dic = {text[x]: x for x in range(0, len(text))} arr = [k for k in dic] arr.sort() for i in arr: f.write('{}\t{}\n'.format(i, str(dic[i]))) f.close() def main (): f = open ('file.txt', 'r', encoding = 'utf8') file = f.read() f.close() text = pointsaway (file) creation (text) main () import re def get_word (word): result = re.search('.*?(\w+)', word) if result: return result.group(1) else: return None def find_ins (text): inst = {} for i, word in enumerate(text): if 'gr="S' in word: if 'ins' in word: inst[i]=word return inst def newfile (words, text): f = open ('ins.txt', 'w', encoding = 'utf8') s = '' for k in words: i = 0 j = 1 while i<3: if get_word(text[k-j]) != None: s = get_word(text[k-j])+ ' ' + s i += 1 j += 1 else: j += 1 s = s + '\t' + get_word(words[k]) + '\t' i = 0 j = 1 while i<3: if get_word(text[k+j]) != None: s = s + ' ' + get_word(text[k+j]) i += 1 j += 1 else: j +=1 f.write(s) f.close def main (): f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8') file = f.read() text = file.split('\n') f.close() ss = find_ins(text) newfile (ss, text) main () import re def find_and_count (file): pos = {} for word in file: word = word.split('<') for part in word: result = re.search('.*?gr="(\w+)', part) if result: print (result.group(1)) if result.group(1) not in pos: pos[result.group(1)] = 1 else: pos[result.group(1)] += 1 return pos def newfile (dic): f = open('pos.txt', 'w', encoding = 'utf8') s = '' for k in dic: s = s + k + '\t' + str(dic[k]) + '\n' f.write(s) f.close def main (): f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8') file = f.read() text = file.split('\n') f.close() pos = find_and_count(text) newfile (pos) main () import re def find_w (file): words = re.findall('', file) n = len(words) return n def find_ana (file): anas = re.findall('(.+?)', line) if a: if a.group(2) not in types: types[a.group(2)] = 0 return types def countthem (file, types): words = [] sum = 0 for key in types: words.append(key) for el in words: for line in file: if '"'+el+'"' in line: sum += 1 types[el] = sum sum = 0 return types def newfile (types): s = '' f = open('adj.txt', 'w', encoding = 'utf8') for key in types: s = s + key + '-' + str(types[key]) + '\n' f.write(s) f.close() def main(): text = filework() dic = findthem(text) dic = countthem (text, dic) newfile(dic) main() import random file=open ('file_6.6.txt', 'r') def noun(): nouns=[] for line in file: if ' n ' in line: line=line.split(' ') nouns.append(line[0]) file.seek(0, 0) return random.choice(nouns) def pronoun(): pronouns=[] for line in file: if ' pn ' in line: line=line.split(' ') pronouns.append(line[0]) file.seek(0, 0) return random.choice(pronouns) def verb(): verbs=[] for line in file: if ' v ' in line: line=line.split(' ') verbs.append(line[0]) file.seek(0, 0) return random.choice(verbs) def adjective (): adjectives=[] for line in file: if ' adj ' in line: line=line.split(' ') adjectives.append(line[0]) file.seek(0, 0) return random.choice(adjectives) def suborob (n, adj, pn): x=random.randint(0,1) if x==0: return pn else: y=random.randint(0,1) if y==0: return n+' '+adj else: return n+' '+pn def declarative (subj, v, obj): return subj.capitalize()+' '+v+' '+obj+'.' def question (subj, v): x=random.randint (0, 1) if x==0: return 'Apa'+' '+subj+' '+v+'?' else: return 'Siapa'+' '+v+'?' def negative (subj, v, obj): x=random.randint(0, 1) if x==0: return subj.capitalize()+' tidak '+v+' '+obj+'.' else: return subj.capitalize()+' bukan '+obj+'.' def imperative (v, obj): x=random.randint(0,1) if x==0: return v.capitalize()+' '+obj+'!' else: return 'Jangan '+v+' '+obj+'!' def conditional (subj1, v1, obj1, subj2, v2, obj2): return 'Kalau '+subj1+' '+v1+' '+obj1+', '+subj2+' '+v2+' '+obj2+'.' def sequence (): a=[1, 2, 3, 4, 5] b=[] for i in range (5): x=random.choice(a) while x in b: x=random.choice(a) b.append(x) return b def text(): seq=sequence() for i in range (5): if seq[i]==1: print(declarative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()))) elif seq[i]==2: print (question(suborob(noun(), adjective(), pronoun()), verb())) elif seq[i]==3: print (negative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()))) elif seq[i]==4: print (imperative(verb(), suborob(noun(), adjective(), pronoun()))) else: print (conditional(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()), suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()))) text() file.close() import re def search (text): otr = re.search ('

\n\n\n', text) if otr: result = otr.group(3) return result def main (): f = open('file_10.6.html', 'r', encoding = 'utf8') file = f.read() f.close() ans = search (file) print (ans) main() def pointsaway (file): file = file.replace('?!', '.') file = file.split('.') for i, word in enumerate (file): file[i] = file[i].replace('.,?!()*&^%$ file[i] = file[i].replace('-- ', ' ') file[i] = file[i].lower() return file def tenplus (text): for sentence in text: sentence = sentence.split() n=0 s=0 for word in sentence: word = word.strip('.,?!()*&^%$ s+=len(word) n+=1 if n>10: print ("Это предложение со словами длины %s"%(str(round(s/n, 1)))) def main (): f = open ("file_12.6.txt", "r", encoding = "utf8") file = f.read() f.close() text = pointsaway (file) tenplus (text) main () import random def intothedic (file): dic={} for line in file: line = line.split(';') for j, word in enumerate(line): line[j] = word.strip('\n') dic[line[0]] = line[1] return dic def youchoose (dic): keys = [] for key in dic: keys.append(key) return random.choice(keys) def thegameison (noun, dic): for key in dic: if key == noun: hint = dic[key] n=key break print (hint, '...') for i in range (3): if input() == n: print ('Победа!') break else: if i == 0: print ('Ещё 2 попытки') continue elif i == 1: print ('Ещё 1 попытка') continue else: print ('GAME OVER') f = open('file_8.6.csv', 'r', encoding = 'utf8') file = f.readlines() f.close() words = intothedic(file) word = youchoose(words) thegameison(word, words) import re def pointsaway (file): file = file.split() for i, word in enumerate (file): file[i] = file[i].strip('.,?!()*&^%$ file[i] = file[i].lower() return file def findverbs (file): verbs = [] for word in file: if re.search ('загру(з(и.*|ят.*)|ж(у.*|ен.*))', word) != None: if word not in verbs: verbs.append(word) return verbs f = open ('file_9.6.txt', 'r', encoding = 'utf8') file = f.read() f.close() file = pointsaway(file) verbs = findverbs(file) print (verbs) def filework (): f = open('corpus.txt', 'r', encoding = 'utf8') file = f.readlines() f.close() return file def newfile (text): f = open('lines.txt', 'w', encoding = 'utf8') f.write(str(len(text))) f.close def main(): text = filework() newfile(text) main() import os def names(array): names = [] for name in array: if os.path.isfile(name): names.append(name) return names def haspoints(array): points = ',!_-' s = 0 su = 0 for name in array: for c in name: if c in points: s += 1 if s > 0: su += 1 s = 0 print ("Знаки препинания есть в названии такого количества файлов: ", su) def main(): files = names (os.listdir('.')) print (os.listdir('.')) haspoints (files) main() a=input ('Введите число ') for i in range (1, 11): print (i, '*', a, '=', i*int(a)) xs=[] for i in range (7): xs.append(int(input())) for i in range (7): if xs[i]>0: for j in range (xs[i]): print ('x', end='') print ('\n') else: print ('\n') word=input() l=len(word) while l>0: word=list(word) x=word.pop(0) print (''.join(word)) l=len(word) def pointsaway (file): file = file.split() for i, word in enumerate (file): file[i] = file[i].strip('.,?!()*&^%$ file[i] = file[i].lower() return file def findomni (file): omni = [] for word in file: if word.startswith('omni'): w = word.replace('omni', '') omni.append([word, w]) return omni def findwords (array, file): n = 0 m = 0 for i, k in enumerate (array): for word in file: if word == array[i][0]: n += 1 if word == array[i][1]: m += 1 print (array[i][0], n, '-', array[i][1], m) n = 0 m = 0 def main (): name = input('Введите имя файла ') f = open (name, "r") file = f.read() f.close() file = pointsaway (file) findwords(findomni(file), file) main() import re def filework(): f = open('corpus.txt', 'r', encoding = 'utf8') file = f.readlines() f.close() return file def findthem (file): types = {} for line in file: a = re.search('(.+?)', line) if a: if a.group(2) not in types: types[a.group(2)] = 0 return types def countthem (file, types): words = [] sum = 0 for key in types: words.append(key) for el in words: for line in file: if '"'+el+'"' in line: sum += 1 types[el] = sum sum = 0 return types def newfile (types): s = '' f = open('types.txt', 'w', encoding = 'utf8') for key in types: s = s + key + '\n' f.write(s) f.close() def main(): text = filework() dic = findthem(text) dic = countthem (text, dic) newfile(dic) main() capital='АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ' s=0 cap=0 f=open('file_5.6.txt', 'r', encoding='utf8') file=f.readlines() l=len(file) for i in range (0, l): file[i]=file[i].split(' ') for i in range (0, l): m=len(file[i]) for j in range (0, m): if file[i][j][0] in capital: cap+=1 s+=1 print ((cap/s)*100) f.close() import xml.etree.ElementTree as a from os import walk def sent(filename): tree = a.parse('./news/'+file) root = tree.getroot() tmp = root.findall('.//se') return(len(tmp)) def move(res,filename): res_file = open(filename, 'w') for item in res: res_file.write(item+'\n') f = [] words = [] p = './news'; for (dirpath, dirnames, filenames) in walk(p): f.extend(filenames) break for file in f: words.append(file+'\t'+str(sent(file))) move(words,'counted.txt') n=[] while True: word = input ('word:') if word ==(''):break elif word.endswith ('tur'): n.append(word) print ('/n'. join(n)) d={'Germany':'Berlin','France':'Paris', 'USA':'Washington DC', 'Russia':'Moscow'} for key in d: print (key+"*"+d[key]) def capital(a): a=input("Country:") if a in d: return (d[a]) else: print ("oops") def revert (): k={} countries=d.keys() for key in countries: k[d[key]]=key return (k) a=revert() print(a) with open ('fr.txt', 'r', encoding = 'utf-8') as a: text=a.readlines() for line in text: if 'союз' in line: print (line) n = input("WORD: ") m = len(n)//2 print (n[:m], n[:m-1:(-1)]) import os import shutil name=input ('напишите што-нибудь') f_name=name.replace(' ', '\\') os.makedirs(f_name) n = input ('word:') for i in range (len(n)): print(n [-i-1::]) import re def open_html('xenokeryx.html'): with open ('xenokeryx.html', 'r', encoding='utf-8') as f: content=f.read() return content def find_links (content): reg=r'(.*?)' links=re.findall (reg, content) return links text=open_html ('xenokeryx.html') links=find_links(text) for link in links [:20]: print (link[1], '-->', link[0]) a=open (input(), 'r', encoding='utf-8') b=0 c=0 for line in a: arr=line.split() b=b+len(arr) for d in arr: if len(d)>10: c=c+1 a.close() e=c/b*100 print (e, '%') import os def sup(): for root,dirs,files in os.walk('.'): num=root.count('\\') root+ root.split('\\')[-1] print ('\t'*(num), root, sep='--') for f in files: print ('\t'*(num+1), f) sup() def opentext (file.txt): forms = [] with open (file.txt, 'r', encoding='utf-8') as a: text=a.read() forms=text.split() for i in range(len(forms)): forms[i]=forms[i].strip(.,?!:;()) return forms def word (): a=opentext(file.txt) b=[] for i in range (len(a)): if a[i][-1]=='s': if a[i][-2]=='u': if a[i][-3]=='o': b.append(a[i]) print (b) c=b.split() d=str.count(c) return d import random def noun (): file=open('Mnoun.txt', 'r', encoding='utf-8') f=readlines() nouns=[] for line in f: nouns.append(line.split(" ")) return random.choise(nouns) def verb (): file=open ('verb1.txt', 'r', encoding='utf-8') f=readlines() verbs=[] for line in f: verbs.append(line.split(" ")) return random.choise(verbs) def adj (): file=open ('adj.txt', 'r', encoding='utf-8') f=readlines() adjectives=[] for line in f: adjectives.append(line.split(" ")) return random.choise(adjectives) def noun2 (): file=open ('noun2.txt','r', encoding='utf-8') f=readlines() plnouns=[] for line in f: plnouns.append(line.split(" ")) return random.choise(plnouns) def conj(): conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"] return "," + random.choise(conjs) def noun3 (): file=open ('noun3.txt', 'r', encoding='utf-8') f=readlines () fnouns=[] for line in f: fnouns.append (line.split(" ")) return random.choise(fnouns) def 2verb (): file=open ('2verb.txt', 'r', encoding ='utf-8') f=readlines () 2verbs=[] for line in f: 2verbs.append (line.split(" ")) return random.choise (2verbs) def sen (): return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"." for i in range(5): print (sen()) import xml.etree.ElementTree as a from os import walk def sent(filename): tree = a.parse('./news/'+file) root = tree.getroot() tmp = root.findall('.//se') return(len(tmp)) def move(res,filename): res_file = open(filename, 'w') for item in res: res_file.write(item+'\n') f = [] words = [] p = './news'; for (dirpath, dirnames, filenames) in walk(p): f.extend(filenames) break for file in f: words.append(file+'\t'+str(sent(file))) move(words,'counted.txt') import xml.etree.ElementTree as a from os import walk import pandas as q def second(filename): tree = a.parse('./news/'+file) root = tree.getroot() name = root.find(".//*[@name='author']") topic = root.find(".//*[@name='topic']") return(name.attrib['content']+":"+topic.attrib['content']) f = [] d = [] p = './news'; for (dirpath, dirnames, filenames) in walk(p): f.extend(filenames) for file in f: tmp = second(file).split(':') tmp_arr = [file,tmp[0],tmp[1]] d.append(tmp_arr) df = q.DataFrame(d,columns=["название","автор","тема"]) df.to_csv("2.csv", sep=';', encoding='windows-1251') import re def main (): with open('F.xml', 'r', encoding='utf-8') as f: text=f.readlines() return text n=main () m=len(n) print (m) import re dic = {} with open('f.xml') as f: for row in f: if(re.match(r'.*',row)): arr = row.split("\"") key = arr[3] if key in dic: dic[key]=dic[key]+1 else: dic[key] = 1; for key in dic.items(): print(key+" "+"\r\n") import os import shutil folder='.' print (os.listdir('.')) for f in os.listdir('.'): with open (os.path.join(folder, f)) as text: print('file: ', f) a=str_word_count(f, ' ') filelist = [f for f in os.listdir('.') if os.path.isfile(f)] if a>1: print(filelist) def open_format(crab): a = [] with open (crab.txt, 'r', encoding = 'utf-8') as f: text = f.read() text = re.sub('\.\.\.|[\.\?]', '!', text) a = text.split('!')[:-1] for i in range(len(a)): a[i] = re.sub('[<>\*\.«»,\'\"]','', a[i]) a[i] = a[i].strip() return a def repeat(): work=open_format (crab.txt) words=re.findall(r'([a-zA-Z]+(?:[?:[\'-][a-zA-Z]+)*)',s) res=[] for x in range (a,z): res.append (x) print (res) import os os.listdir('.') file_tree=os.walk('.') names = {} for root, dirs, files in os.walk('.'): for f in files: name = f.split('.')[0] if name not in names: names[name]=1 print(len(names)) import random def noun (): file=open('Mnoun.txt', 'r', encoding='utf-8') f=readlines() nouns=[] for line in f: nouns.append(line.split(" ")) return random.choise(nouns) def verb (): file=open ('verb1.txt', 'r', encoding='utf-8') f=readlines() verbs=[] for line in f: verbs.append(line.split(" ")) return random.choise(verbs) def adj (): file=open ('adj.txt', 'r', encoding='utf-8') f=readlines() adjectives=[] for line in f: adjectives.append(line.split(" ")) return random.choise(adjectives) def noun2 (): file=open ('noun2.txt','r', encoding='utf-8') f=readlines() plnouns=[] for line in f: plnouns.append(line.split(" ")) return random.choise(plnouns) def conj(): conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"] return "," + random.choise(conjs) def noun3 (): file=open ('noun3.txt', 'r', encoding='utf-8') f=readlines () fnouns=[] for line in f: fnouns.append (line.split(" ")) return random.choise(fnouns) def 2verb (): file=open ('2verb.txt', 'r', encoding ='utf-8') f=readlines () 2verbs=[] for line in f: 2verbs.append (line.split(" ")) return random.choise (2verbs) def sen (): return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"." for i in range(5): print (sen()) import re def main(): with open ('lemon.html', 'r', encoding='utf-8') as f: text=f.read() a='+?
Отряд:(.+?)
' if re.search (a, text): card = re.search(a, text).group() b='Семейство(?:.|\n)*?

(.+?)' if re.search(b, a): с = re.search(b, a).group(1) with open ('family.txt', 'a', encoding = 'utf-8') as f: f.write(с) else: print('Family type not found.') with open ('family.txt', 'a', encoding = 'utf-8') as f: f.write('Family type not found.') else: print('Error!') with open ('family.txt', 'a', encoding = 'utf-8') as f: f.write('Error!') import os import re def count_words(): with open('answer1.txt', 'w', encoding='utf-8') as fout: for root, dirs, files in os.walk('./news'): for f in files: count = 0 with open(os.path.join(root, f), 'r') as fin: f1 = fin.read().split() for line in f1: if '' in line: count += 1 fout.write('%s \t %d \n' %(f, count)) def annot(): with open('answer2.csv', 'w', encoding='utf-8') as fout: fout.write('Название файла \t Автор \t Дата создания') for root, dirs, files in os.walk('./news'): for f in files: with open(os.path.join(root, f), 'r') as fin: f2 = fin.read() nam = f reg1 = '' reg2 = '' auth = re.search(reg1, f2).group(1) date = re.search(reg2, f2).group(1) fout.write('%s \t %s \t %s \n' %(f, auth, date)) def bigramms(): with open('answer3.txt', 'w', encoding='utf-8') as fout: for root, dirs, files in os.walk('./news'): for f in files: with open(os.path.join(root, f), 'r') as fin: f3 = fin.read().split('\n') reg = '(.+?)' for indx, sentence in enumerate(f3): if '' in sentence: f3[indx] = [re.search(reg, sentence).group(1), re.search(reg, sentence).group(2)] else: f3.remove(sentence) temp = True for indx, word in enumerate(f3): try: if 'A' in word[0]: if 'gen' in word[0]: if 'S' in f3[indx + 1][0]: if 'gen' in f3[indx + 1][0]: fout.write('%s %s \n' %(word[1], f3[indx + 1][1])) except IndexError: temp = False def main(): count_words() annot() bigramms() if __name__ == '__main__': main() def done_text(): f = open('ostin.txt', 'r', encoding='utf-8') s = f.read().lower().split() f.close() for indx, word in enumerate(s): s[indx] = word.strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') return s def count_words(arr): d = {} for word in arr: if word in d: d[word] += 1 else: d[word] = 1 return d def count_letters(arr): dic = {} alpha = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя' for letter in alpha: dic[letter] = 0 for word in arr: if word and word[0] in dic: dic[word[0]] += 1 return dic def count_pos(arr): dic = {key:ind for ind, key in enumerate(arr)} return dic def create_antw(dic): f = open('answer_keys2.tsv', 'w', encoding='UTF-8') for key in sorted(dic): f.write('{0}\t{1}\n'.format(key, str(dic[key]))) f.close() def main(): textik = done_text() create_antw(count_pos(textik)) if __name__ == '__main__': main() my_num = 9 your_num = int(input('Write a number from 1 to 10, please: ')) if your_num == my_num: print('You\'re lucky one :D') else: if your_num > my_num: print('Your number is too big') else: print('Your number is too small') your_num = int(input('Try again: ')) if your_num == my_num: print('You\'re lucky one :D') else: print('You\'re hopeless') import re def split_txt(): f = open('test1.txt', 'r', encoding='UTF-8') s = f.read() s.replace('\n', ' ') s1 = re.sub('(\?|!|\.\.\.|([а-яa-z.]+ [а-яa-zА-ЯA-Z]{2,}\.))', '\\1^', s) print(s1) def main(): split_txt() main() import re def find_space(fname): f = open(fname, 'r', encoding='UTF-8') s = f.read().split() regex = '«[a-zA-ZА-Яа-я]+?-[0-9]' wlist = re.findall(regex, s) print(', '.join(wlist)) def main(): find_space('test.txt') main() coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя' while True: s = input('Введите текст: ') if s == '': break s = s.split() res = '' if s[0] == 'decode': s.pop(0) s = ' '.join(s) for letter in s: if letter == '!': res += ' ' else: for indx, i in enumerate(coinc): if i == letter: if i == 'A': res += 'Z' elif i == 'a': res += 'z' elif i == 'А': res += 'Я' elif i == 'а': res += 'я' else: res += coinc[indx - 1] else: if s[0] == 'code': s.pop(0) s = ' '.join(s) for letter in s: if letter == ' ': res += '!' else: for indx, i in enumerate(coinc): if i == letter: if i == 'Z': res += 'A' elif i == 'z': res += 'a' elif i == 'Я': res += 'А' elif i == 'я': res += 'а' else: res += coinc[indx + 1] print (res) print('Программа завершила свою работу!') words=[] check = True while check is True: inp = input("Введите слово: ") if inp == "": check = False else: words.append(inp) for indx in range(len(words) - 1, -1, -1): print(words[indx]) check = True while check is True: s = input("Введите текст: ") if s == "": check = False else: res = "" for letter in s: if ord(letter) == 90: res += chr(65) elif ord(letter) == 122: res += chr(97) elif ord(letter) == 1071: res += chr(1040) elif ord(letter) == 1103: res += chr(1072) else: res += chr(ord(letter) + 1) print (res) print("Программа завершила работу") names = ['Оля','Маша','Коля','Костя','Нина','Ира'] surnames=['Кузнецова', 'Сидорова', 'Семенов', 'Иванов', 'Илларионова'] if len(names) >= len(surnames): for i in range(len(surnames)): strng = names[i] + ' ' + surnames[i] print(strng) check = len(surnames) while check < len(names): print(names[check]) check += 1 else: for i in range(len(names)): strng = names[i] + ' ' + surnames[i] print(strng) check = len(names) while check < len(surnames): print(surnames[check]) check += 1 def doneText(fname): f = open(fname, 'r') s = f.read().split(' ') for indx, word in enumerate(s): s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"') f.close return s fnm = input('Введите имя файла: ') arr = doneText(fnm) print('Количество слов в тексте = {0}'.format(arr.len())) my_num = 9 check = False while (check == False): try: your_num = int(input('Write a number from 1 to 10, please: ')) except ValueError: print("It's not a number, I'm out") break if your_num == my_num: print('You\'re lucky one :D') check = True else: if your_num > my_num: print('Your number is too big') else: print('Your number is too small') print("End of programme") check = True while check == True: word = input('Write a word in cyrillic: ') if word == "": check = False print("Empty word, I'm out") else: if word.endswith('о') or word.endswith('н') or word.endswith('р'): print('Possible forms: Nom. Sg. / Acc. Sg.') elif word.endswith('а'): print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.') elif word.endswith('у'): print('Possible forms: Dat. Sg.') elif word.endswith('ом'): print('Possible forms: Instrum. Sg. / Nom. Sg.') elif word.endswith('е'): print('Possible forms: Prep. Sg.') elif word.endswith('ам'): print('Possible forms: Dat. Pl.') elif word.endswith('ами'): print('Possible forms: Instrum. Pl.') elif word.endswith('ах'): print('Possible forms: Prep. Pl.') elif word.endswith('и'): print('Possible forms: Nom. Pl.') else: print('Possible forms: Gen. Pl.') print('Thanks for using!') import os import re def rem_dir(name_dir): for root, dirs, files in os.walk('.', topdown=False): if re.findall(os.sep + name_dir, root): for f in files: os.remove(os.path.join(root, f)) for d in dirs: os.rmdir(os.path.join(root, d)) for d in dirs: if name_dir == d: os.rmdir(os.path.join(root, d)) def print_root(): s = '--' for root, dirs, files in os.walk('.'): print (s + root) if len(dirs): s = '\t' + s for f in files: print('\t{0}'.format(f)) def main(): rem_dir('wrong') print_root() if __name__ == '__main__': main() word = input('Write a word in cyrillic: ') if word.endswith('о'): print('Possible forms: Nom. Sg. / Acc. Sg.') elif word.endswith('а'): print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.') elif word.endswith('у'): print('Possible forms: Dat. Sg.') elif word.endswith('ом'): print('Possible forms: Instrum. Sg.') elif word.endswith('е'): print('Possible forms: Prep. Sg.') elif word.endswith('ам'): print('Possible forms: Dat. Pl.') elif word.endswith('ами'): print('Possible forms: Instrum. Pl.') elif word.endswith('ах'): print('Possible forms: Prep. Pl.') else: print('Possible forms: Gen. Pl.') import re def done_text(fname): f = open(fname, 'r', encoding='UTF-8') s = f.read().split() for indx, word in enumerate(s): s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><') f.close return s def print_words(s): regex = '.*[ауоиыеюя].*[ауоиыеюя].*[ауоиыеюя].*' for word in s: if re.search(regex, word): print(word) def main(): textik = done_text(input('Введите имя файла с расширением: ')) print_words(textik) main() import os import re import math from math import log PUNCT = '[.,!«»?&@"$\[\]\(\):;% def preprocessing(text): text_wo_punct = re.sub(PUNCT, '', text.lower()) words = text_wo_punct.strip().split() return words def count_tf(word, text): return text.count(word) / len(text) def count_df(word, texts): n = [1 for text in texts if word in text] return sum(n) def count_idf(word, texts): n = len(texts) / (1 + count_df(word, texts)) return n def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) return log(tf, 10) * log(idf, 10) def get_texts(): texts_dic = {} for root, dirs, files in os.walk('wikipedia'): for f in files[:50]: with open(os.path.join(root, f), 'r', encoding='utf-8') as t: text = preprocessing(t.read()) texts_dic[f.split('.')[0]] = text texts = list(texts_dic.values()) return texts_dic, texts def fin_output(texts_dic, texts): for text in texts_dic: print("Top words in document {}".format(text)) scores = {} for word in texts_dic[text]: scores[word] = count_tfidf(word, texts_dic[text], texts) sorted_words = sorted(scores.items(), key=lambda x: x[1]) for word, score in sorted_words[:5]: print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5))) def main(): a = get_texts() fin_output(a[0], a[1]) if __name__ == '__main__': main() check = True coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя' while check is True: s = input('Введите текст: ') if s == '': check = False else: res = '' for letter in s: if letter == ' ': res += ' ' else: for indx, i in enumerate(coinc): if i == letter: if i == 'A': res += 'Z' elif i == 'a': res += 'z' elif i == 'А': res += 'Я' elif i == 'а': res += 'я' else: res += coinc[indx - 1] print (res) print('Программа завершила свою работу!') def done_text(fname): f = open(fname, 'r') s = f.read().split() for indx, word in enumerate(s): s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><') f.close return s def freq_dic(arr): dic = {} for word in arr: if word not in dic: dic[word] = 1 else: dic[word] += 1 return dic def print_dic(dic): for word in dic: if dic[word] >= 10: print(word, dic[word]) def main(): my_text = done_text(input('Введите имя файла с расшриением: ')) print_dic(freq_dic(my_text)) main() def done_text(fname): f = open(fname, 'r') s = f.read().split() for indx, word in enumerate(s): s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') f.close return s def count_syll(arr, n): res = [] voc = 'аоуыиеёюя' for word in arr: num = 0 for letter in word: if letter in voc: num += 1 if num == n: res.append(word) return res def first_letter(arr, letter): res = [] for word in arr: if word.startswith(letter): res.append(word) return res def choice(): fnm = input('Введите имя файла: ') textik = done_text(fnm) make_choice = input('Если хотите, чтобы программа считала слоги, введите syllables; иначе - letter: ') if make_choice == 'syllables': numb = int(input('Введите количество слогов в словах: ')) print(' '.join(count_syll(textik, numb))) else: lett = input('Введите желаемую первую букву: ') print(' '.join(first_letter(textik, lett))) def main(): choice() main() import os def mk_ppk(s): s = s.split() pth = '.' for word in s: pth += os.sep + word if not os.path.exists(pth): os.makedirs(pth) def mk_fls(num): pth = '.' for ppk in range(num): pth += os.sep + str(ppk+1) if not os.path.exists(pth): os.makedirs(pth) for pp_quant in range(ppk+1): f = open(pth + os.sep + str(pp_quant + 1) + '.txt', 'w') f.close() pth = '.' def prnt_dir(nm_dir): for fl in os.listdir(nm_dir): if os.path.isdir(fl): print(fl) def main(): mk_ppk(input('Введите приложение: ')) mk_fls(int(input('Введите число: '))) prnt_dir('.') if __name__ == '__main__': main() my_num = 9 check = False your_num = int(input('Write a number from 1 to 10, please: ')) while(your_num != my_num): if your_num > my_num: print('Your number is too big') else: print('Your number is too small') try: your_num = int(input('Try again: ')) except ValueError: print("Not a number") check = True break if check == True: print("See you next time") else: print("You're right") check = True coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя' while check is True: s = input('Введите текст: ') if s == '': check = False else: res = '' for letter in s: if letter == ' ': res += ' ' else: for indx, i in enumerate(coinc): if i == letter: if i == 'Z': res += 'A' elif i == 'z': res += 'a' elif i == 'Я': res += 'А' elif i == 'я': res += 'а' else: res += coinc[indx + 1] print (res) print('Программа завершила свою работу!') f = open('freq_crlf.txt', 'r', encoding='utf-8') s = f.read().split('\n') f.close() for line in s: line = line.split(' | ') if line[1] == 'союз': print(' | '.join(line)) f = open('freq_crlf.txt', 'r', encoding='utf-8') s = f.read().split('\n') f.close() arr = [] while True: word = input('Введите слово: ') if word == '': print('Результаты:') break else: arr.append(word) for word in arr: check = False for line in s: line = line.split(' | ') if word == line[0]: print(' | '.join(line)) check = True if check is False: print(u'{0}: Такого слова в словаре нет.'.format(word)) print('Завершение работы программы')f = open('freq_crlf.txt', 'r', encoding='utf-8') s = f.read().split('\n') f.close() while True: word = input('Введите слово: ') if word == '': print('Завершение работы программы') break else: check = False for line in s: line = line.split(' | ') if word == line[0]: print(' | '.join(line)) check = True if check is False: print('Такого слова в словаре нет.') import decimal f = open('freq_crlf.txt', 'r', encoding='utf-8') s = f.read().split('\n') f.close() ress = '' ipm_sum = 0 for line in s: line = line.split(' | ') if line[1].find('ед жен') != -1: ress += line[0] ress += ', ' ipm_sum += decimal.Decimal(line[2]) print(ress) print(u'Суммарное значение ipm = {0}'.format(ipm_sum))import re def open_file(): f = open('islandic.xml', 'r', encoding='UTF-8') s = f.read() f.close() return s def count_lines(): s = open_file() s = s.split('\n') f = open('answer_length.txt', 'w', encoding='UTF-8') f.write(str(len(s))) f.close() def my_diction(arr): dix = {} for word in arr: if word in dix: dix[word] += 1 else: dix[word] = 1 return dix def create_diction(): s = open_file() regex = '' arr = re.findall(regex, s) dix = my_diction(arr) f = open('answer_keys.txt', 'w', encoding='UTF-8') f.write('Отсортированный список морфологических разборов:\n') for key in sorted(dix): f.write(key + '\n') f.close() def count_adj(): s = open_file() regex = '' arr = re.findall(regex, s) dix = my_diction(arr) f = open('answer_adj.txt', 'w', encoding='UTF-8') for key in sorted(dix): f.write(key + ' ' + str(dix[key]) + '\n') f.close() def create_csv(): s = open_file() print(s) regex1 = '(.+?)' regex2 = '<.+?>\n' s = re.sub(regex1, '\\1, \\2, \\3', s) s = re.sub(regex2, '', s) s = re.sub('( )+?', '', s) s = s.split('\n') f = open('answer_dict.csv', 'w', encoding='UTF-8') for line in s: f.write(line + '\n') f.close() def main(): count_lines() create_diction() count_adj() create_csv() if __name__ == '__main__': main() check = True words = [] while check is True: s = input("Введите слово: ") if s == "": check = False else: temp = [] for letter in s: temp.append(letter) words.append(temp) for wrd in words: for letterindx in range(2, len(wrd), 2): if letterindx >= len(wrd): break wrd.pop(letterindx) s = "" for letterindx in range(len(wrd) - 1, -1, -1): s += wrd[letterindx] print(s)f = open('input.txt', 'r', encoding='UTF-8') s = f.read().split('\n') f.close() avgsum = 0 for indx, line in enumerate(s): s[indx] = line.split() avgsum += len(s[indx]) print(u'Среднее количество слов в строке = {0}'.format(avgsum / len(s)))import re word = input("Введите слово на кириллице: ") pattern1 = r'[А-Яа-я]' pattern2 = r'[1-9A-Za-z]' if re.match(pattern1, word) and re.search(pattern2, word) is None: for indx, letter in enumerate(word): if indx % 2 != 0: if letter != "а" and letter != "к": print(letter) else: print("Вводить можно только кириллицу :Р")temp = False while (temp == False): try: a = float(input('Введите первое число (a) ')) b = float(input('Введите второе число (b) ')) c = float(input('Введите третье число (c) ')) temp = True except (TypeError, ValueError): print('Просила же только числа вводить!') if a + b == c: print('Поздравляю! a + b = c') else: print('Прошу прощения, но a + b != c') if a*c + b == 0: print('Поздравляю! a*c + b = 0') else: print('Прошу прощения, но a*c + b != 0')import random def ask_name(): return input('Введите имя файла с расширением: ') def get_words(): f = open(ask_name(), 'r', encoding='UTF-8') s = f.read().split('\n') f.close() dic = {} for ln in s: temp = ln.split(',') dic[temp[0]] = temp[1:] return dic def guess_word(word): num = len(word) if num <= 4: print('У вас {0} попытки'.format(num)) else: print('У вас {0} попыток'.format(num)) while num > 0: temp = input('Введите слово: ') if temp == word: print('Вы угадали!') break else: print('Попробуйте еще раз!') num -= 1 if num == 0: print('Повезет в другой раз!') def game(d): num_check = 0 for k in d: print('Подсказка! {0} ...'.format(random.choice(d[k]))) guess_word(k) num_check += 1 if num_check == len(d): print('Это было последнее слово. Приходите еще') break ask = input('Хотите попробовать еще раз? Введите только "да" или "нет": ') if ask == 'нет': break def main(): d = get_words() game(d) main()def done_text(fname): f = open(fname, 'r') s = f.read().split() for indx, word in enumerate(s): s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') f.close return s def get_fname(): return input("Введите имя файла с расширением: ") def count_ing(arr): res = 0 for word in arr: if word.endswith('ing'): res += 1 return res def count_form(arr, form): res = 0 for word in arr: if word == form: res += 1 return res def main(): textik = done_text(get_fname()) print('Всего в тексте {0} форм на -ing'.format(count_ing(textik))) form = input('Введите форму, количество вхождений которой хотите найти: ') print('Эта форма встречается {0} раз'.format(count_form(textik, form))) main()import re def search_inf(fname): f = open(fname, 'r', encoding='UTF-8') s = f.read() f.close() regex = '>Столица.*?([А-Яа-я]+(-[А-Яа-я]+)*)' res = re.search(regex, s, re.DOTALL) if res: k = open('answer.txt', 'w', encoding='UTF-8') print(res.group(1)) k.write(res.group(1)) k.close() def main(): search_inf(input('Введите имя файла: ')) if __name__ == '__main__': main()import re def change_text(): f = open('mosq1.txt', 'r', encoding='UTF-8') s = f.read() f.close() s1 = re.sub('Комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'Слон\\1', s) s1 = re.sub('комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'слон\\1', s1) f = open('antwort.txt', 'w', encoding='UTF-8') f.write(s1) f.close() def main(): change_text() if __name__ == '__main__': main()import random def generate_adj(): f = open('adj.txt', 'r') s = f.read().split() f.close() return random.choice(s) def generate_noun(num): if num == 'sg': f_name = 'noun_sg.txt' else: f_name = 'noun_pl.txt' f = open(f_name, 'r') s = f.read().split() f.close() return random.choice(s) def generate_verb(): f = open('verbs.txt', 'r') s = f.read().split() f.close() return random.choice(s) def generate_punct(pos): if pos == 'end': f_name = 'end_punct.txt' else: f_name = 'mid_punct.txt' f = open(f_name, 'r') s = f.read().split() f.close() punct = random.choice(s) if punct == '-': punct = ' ' + punct return punct def generate_pronoun(): f = open('pronouns.txt', 'r') s = f.read().split() f.close() return random.choice(s) def generate_intj(): f = open('intj.txt', 'r') s = f.read().split('\n') f.close() return random.choice(s) def generate_line(num): if num == 1: return generate_adj() + ' ' + generate_noun('sg') + generate_punct('end') + '\n' elif num == 2: return generate_verb() + ' ' + generate_noun('pl') + ' и' + '\n' else: return generate_pronoun() + generate_punct('mid') + ' ' + generate_intj() + generate_punct('end') + '\n' def generate_haiku(): return generate_line(1) + generate_line(2) + generate_line(3) print(generate_haiku())import re def done_text(fname): f = open(fname, 'r', encoding='UTF-8') s = f.read().lower() rez1 = '(,|:|№|-|\*|/|\||\[|\]|{|}|\\|(|)|\'|"|[0-9]|«|»|>|<|V|I|X)+' s = re.sub(rez1, ' ', s) rez = '\.|\?|!|\.\.\.' s = re.split(rez, s) f.close() for indx, sent in enumerate(s): s[indx] = sent.split() if len(s[indx]) == 0: s.pop(indx) return s def count_letters(arr): mlist = [(indx + 1, word, len(word)) for indx, senten in enumerate(arr) for word in senten if len(word) >= 7] f = open('answer_sheet12.txt', 'w', encoding='UTF-8') for k in mlist: f.write('предложение {0}, {1}-------{2}\n'.format(k[0], k[1], k[2])) f.close() def main(): count_letters(done_text('tolstoy.txt')) if __name__ == '__main__': main()import os import re def count_dirs(): res = '[0-9]' arr = [thing for thing in os.listdir('.') if os.path.isdir(thing) and len(re.findall(res, thing))] return arr def print_answer(arr): fout = open('answer_sheet13.txt', 'w', encoding='UTF-8') fout.write('Всего папок с цифрами в названии - {0}.'.format(str(len(arr)))) fout.write('Все имена в директории (без повторений):\n') clear_names = [] for thing in os.listdir('.'): temp = thing if os.path.isfile(thing): temp = re.sub('\..+', '', thing) if temp not in clear_names: clear_names.append(temp) for nme in clear_names: if nme: fout.write(nme + '\n') fout.close() def main(): print_answer(count_dirs()) if __name__ == '__main__': main()import os def count_dep(): count = 0 for root, dirs, files in os.walk('.', topdown=False): if len(root.split(os.sep)) - 1 > count: count = len(root.split(os.sep)) - 1 with open('answer_sheet14.txt', 'w', encoding='UTF-8') as answer: answer.write(str(count)) def main(): count_dep() if __name__ == '__main__': main()while True: s = input("Введите строку: ") if s == "": break for indx, part in enumerate(s): print(s[:len(s) - indx])s = input("Введите строку: ") for indx, part in enumerate(s): print(s[:len(s) - indx])import re def ask_name(): return input('Введите имя файла с расширением: ') def get_words(): f = open(ask_name(), 'r', encoding='UTF-8') s = f.read().split() for indx, word in enumerate(s): s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><') f.close return s def count_words(words): regex = 'откр(ы|о)((т(ый|ая|ое|ые|ого|ой|ых|ому|ым|ую|ом|ою)|в(ш(ий?|ая|ее|ие|его|ей|их|ему|им|ую|ею))?)|(л(а|о|и)?)|(й(те)?)|(ют?|е(шь|м|те?)))(ся|сь)?' wlist = [] for word in words: if re.fullmatch(regex, word): if word not in wlist: wlist.append(word) return wlist def main(): print(', '.join(count_words(get_words()))) main()word = input('Введите слово: ') if word == '': print ('Слово не введено') word2 = '' for i in range(len(word)): for k in range(len(word)): if k + i < len(word): word2 += word[k + i] else: word2 += word[k + i - len(word)] print (word2) word2 = '' import re def open_and_edit(): f = open("verbs.txt", 'r', encoding = "utf-8") s = f.read() f.close() s1 = s.lower() a = s1.split() for i, word in enumerate(a): a[i] = word.strip('.,!?();:*/\|<>-_%& return a def find_and_print(a): arr = [] for word in a: if re.search('^программир((у(ю(т|щ(и(й|ми?|е|х)|е(го|му?|й)|ая|ую))?|я|е(шь|те?))|ова(л(а|и)?|ть))(с(я|ь))?|уем(ы(й|ми?|е|х)?|о(го|му?|й)|ая?|ую))', word): if word not in arr: arr.append(word) for verb in arr: print(verb) def main(): text = open_and_edit() find_and_print(text) main() import re import os def task1(): for root, dirs, files in os.walk('.\\news'): s = '' for f in files: file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251") text = file.readlines() words = 0 for line in text: reg = '' r = re.search(reg,line) if r: words += 1 s += f + '\t' + str(words) + '\n' f2 = open("words_in_files.txt", 'w', encoding = "utf-8") f2.write(s) def task2(): for root, dirs, files in os.walk('.\\news'): s = '' for f in files: file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251") text = file.readlines() author = '' date = '' for line in text: reg_author = 'content="([ |(а-яА-яa-zA-Z)]+)" name="author"' reg_date = 'content="([0-9]+\.[0-9]+\.[0-9]+)" name="created"' r1 = re.search(reg_author, line) if r1: author = r1.group(1) r2 = re.search(reg_date, line) if r2: date = r2.group(1) s += f + '\t' + author + '\t' + date + '\n' f3 = open("words_in_files.csv", 'w', encoding = "utf-8") f3.write(s) def task3(): for root, dirs, files in os.walk('.\\news'): s = '' for f in files: file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251") text = file.readlines() for i, line in enumerate(text): reg_adj = 'A=.+gen.+>?' reg_sumj = 'S,.+gen.+>?' reg_word = '([а-яА-Я]+|`)\n' r1 = re.search(reg_adj, line) if r1: r2 = re.search(reg_word, line) word1 = r2.group(1) r3 = re.search(reg_subj, text[i+1]) if r3: word2 = r3.group(1) def main(): task1() task2() main() import re def open_and_edit(): f = open("linguistics.txt", 'r', encoding = "utf-8") s = f.read() f.close() return s def replace_and_output(s): s1 = re.sub('язык([а-я]{,3}( |\.|,|\)))','шашлык\\1', s) s2 = re.sub('Язык([а-я]{,3}( |\.|,|\)))','Шашлык\\1', s1) f = open("shashlyk.txt", 'w', encoding = "utf-8") f.write(s2) print('Текст записан в файл shashlyk.txt') f.close() def main(): text = open_and_edit() replace_and_output(text) main() import os import re def count_folders(): result = 0 for f in os.listdir('.'): if os.path.isdir(f): if re.search('^([а-яА-Я]| )+$',f): result += 1 print('Найдено папок:',result) def print_names(): names = {} file_name = '^(.+)(\.[a-z]+)$' for f in os.listdir('.'): if os.path.isdir(f): if f not in names: names[f] = 1 if os.path.isfile(f): r = re.search(file_name,f) if r: name = r.group(1) if name not in names: names[name] = 1 for name in sorted(names): print(name) def main(): count_folders() print_names() main() def read_file(): f = open("words.csv", 'r', encoding = "utf-8") a = f.readlines() f.close() return(a) def make_dict(a): words = {} for line in a: a2 = line.split(';') for i, h in enumerate(a2): a2[i] = h.strip() words[a2[1]] = a2[0] return words def guess(dic): for noun in dic: print(dic[noun], '...') attempt = 0 while attempt != len(dic[noun]): print('Осталось попыток: ', len(dic[noun]) - attempt ) attempt += 1 if input() == noun: print('Маладэц!') attempt = len(dic[noun]) elif len(dic[noun]) - attempt == 0: print('Не угадал :(') def main(): text = read_file() words = make_dict(text) print(words) guess(words) main() import re def open_and_edit(): f = open("hse.html", 'r', encoding = "utf-8") s = f.read() f.close() return s def find_and_print(s): reg1 = 'Преподаватели.*?\n.*?\n

[0-9]+ ?[0-9]+' reg2 = 'Преподаватели.*?\n.*?\n

' res1 = re.findall(reg1,s) res2 = re.findall(reg2,s) number = res1[0].replace(res2[0], '') print('Число преподавателей:',number) f = open("found_number.txt", 'w', encoding = "utf-8") f.write(number) f.close() def main(): text = open_and_edit() find_and_print(text) main() words = [] while True: newword = input('Введите слово: ') if newword == '': break else: words.append(newword) for i in range(len(words)): string = words[i] if (i+1) >= len(string): print('В этом слове не осталось символов') else: print(string[i+1:]) a = input ('Введите число a: ') b = input ('Введите число b: ') c = input ('Введите число c: ') a = int (a) b = int (b) c = int (c) if a % b == c: print ('a дает остаток c при делении на b') else: print ('a не дает остаток c при делении на b') if a * c + b == 0: print ('c является решением линейного уравнения ax + b = 0') else: print ('c не является решением линейного уравнения ax + b = 0') import random def open_file(): f = open("words.txt", 'r', encoding = "utf-8") text = f.readlines() f.close() return(text) def find_words(word,text): for i in range(len(text)): line = [] line = text[i].split() for l, w in enumerate(line): line[l] = w.strip('.,!?();:*/\|<>-_%& if line[0] == word: words = [] for j in range(len(line)): if j > 0: words.append(line[j]) return(words) def noun(): find = 'существительное' nouns = find_words(find, text) return random.choice(nouns) def imperative(): find = 'императив' imper = find_words(find, text) return random.choice(imper) def adverb(imp): find = 'наречие' adverbs = find_words(find, text) return random.choice(adverbs) + ' ' + imp def verb(): find = 'глагол' verbs = find_words(find, text) return random.choice(verbs) def adjective(): find = 'прилагательное' adj = find_words(find, text) return random.choice(adj) def question_word(): find = 'вопрос' quest = find_words(find, text) return random.choice(quest) def pos_sentence(): sentence = adjective() + ' ' + noun() + ' ' + verb() +\ ' ' + adjective() + ' ' + noun() + '.' sentence = sentence.capitalize() return(sentence) def neg_sentence(): sentence = adjective() + ' ' + noun() + ' не ' + verb() +\ ' ' + adjective() + ' ' + noun() + '.' sentence = sentence.capitalize() return(sentence) def quest_sentence(): sentence = question_word()+ ' ' + adjective() + ' ' + noun() +\ ' ' + verb() + ' ' + adjective() + ' ' + noun() + '?' sentence = sentence.capitalize() return(sentence) def imper_sentence(): sentence = adverb(imperative()) + ' ' + noun() + '!' sentence = sentence.capitalize() return(sentence) def if_sentence(): sentence = 'если бы ' + noun() + ' ' + verb() + ' ' + noun() +\ ', то ' + noun() + ' ' + verb() + ' бы ' + noun() + '.' sentence = sentence.capitalize() return(sentence) def random_print(): spisok = [pos_sentence(), neg_sentence(), quest_sentence(),\ imper_sentence(), if_sentence()] random.shuffle(spisok) for i in range(len(spisok)): print(spisok[i], end = ' ') text = open_file() random_print() word = input ('Введите слово: ') indx = len(word)-1 while indx >= 0: if (word[indx]!= 'я') & (word[indx]!= 'з') : print (word[indx]) indx -= 1 import os import re def extensions(): ext_count = {} for root, dirs, files in os.walk('.'): for file in files: ext = re.findall('\.[a-z0-9A-Z]+$', file) if ext[0] not in ext_count: ext_count[ext[0]] = 1 else: ext_count[ext[0]] += 1 numb = 0 found_ext = '' for ext in ext_count: if ext_count[ext] > numb: numb = ext_count[ext] found_ext = ext print(found_ext) def main(): extensions() main() import re def open_and_read(): f = open("animal_farm.txt", 'r', encoding = "utf-8") s = f.read() f.close() return s def read_sentences(s): s1 = re.sub('[a-z](\.|!|\?)','\\1@@', s) a = s1.split('@@') return a def split_and_count(a): for i in range(len(a)): words = a[i].split() words2 = [words[j].strip('.,!?();:*/\|<>-_%& for word in range(len(words2)): print('%s_%s' %(words2[word], len(words2[word]))) def main(): text = open_and_read() sent = read_sentences(text) split_and_count(sent) main() f = open("1.txt", 'r', encoding = "utf-8") word1 = 0 word3 = 0 for line in f: words = line.split() for i in range(len(words)): if len(words[i]) == 3: word3 += 1 elif len(words[i]) == 1: word1 += 1 words = [] if word1 == 0: print("Нет слов длинны 1") else: print(float(word3)/float(word1)) def open_and_edit(): name = input('Введите название файла: ') f = open(name, 'r', encoding = "utf-8") s = f.read() f.close() s1 = s.lower() a = s1.split() for i, word in enumerate(a): a[i] = word.strip('.,!?();:*/\|<>-_%& return a def find_in_text(t): hood = [] for word in t: if word.endswith('hood'): hood.append(word) print('В тексте нашлось ', len(hood), ' существительных с суффиксом -hood') return hood def short_list(arr): short = [] arr2 = [] for k in arr: arr2.append(k) for i in range(len(arr2)-1): if arr2[i]: short.append(arr2[i]) x = 1 for j in range(i+1, len(arr2)): if arr2[i]: if arr2[i] == arr2[j]: x += 1 arr2[j] = [] short.append(x) return short def min_freq(arr): short = short_list(arr) min = short[1] index = 1 for k in range(1, len(short), 2): if short[k] < min: index = k min = short[k] print('Минимальную частотность имеет существительное', short[index-1]) def print_nouns(arr): nouns = [] short = short_list(arr) for word in short: if type(word) != int: nouns.append(word.replace('hood', '')) all_nouns = ', '.join(nouns) print('Найденный слова образованы от существительных ', all_nouns) def main(): text = open_and_edit() found = find_in_text(text) min_freq(found) print_nouns(found) main() f = open("aphor.txt", 'r', encoding = "utf-8") a = f.readlines() f.close() for i in range(len(a)): words = [] words = a[i].split() numb = 0 for j in range(len(words)): if words[j] != '—': numb += 1 if numb < 16: print(a[i]) author = [] um = 0 for i in range(len(a)): words = [] words = a[i].split() for l, word in enumerate(words): words[l] = word.strip('.,!?();:*/\|<>-_%& for j in range(len(words)): if words[j] == 'ум': um += 1 povtor = 0 for k in range(len(author)): if author[k] == words[len(words)-1]: povtor += 1 if povtor == 0: author.append(words[len(words)-1]) out = '' out = ', '.join(author) print('Количество цитат = ', um) print('Источники: ', out) inp_words = [] while True: newword = input('Введите слово: ') if newword == '': break else: inp_words.append(newword) for j in range(len(inp_words)): found = 0 print(inp_words[j]) for i in range(len(a)): words = [] words = a[i].split() for l, word in enumerate(words): words[l] = word.strip('.,!?();:*/\|<>-_%& for k in range(len(words)): if inp_words[j] == words[k]: print(a[i]) found += 1 break if found == 0: print('Цитата с этим словом не найдена') import re def open_file(): f = open("file.txt", 'r', encoding = "utf-8") a = [] for line in f: a.append(line) f.close() return a def open_new_file(): f = open("created_file.txt", 'w', encoding = "utf-8") return f def write_lines_number(a,f): i = 0 for line in a: i += 1 f.write(str(i)) f.write('\n') def create_dictionary(a,f): dic = {} for line in a: if '= 5:\n')) while n < 5: n = int(input('Введённое число < 5. Пожалуйста, введите число >= 5:\n')) return n def func3(dic, n): for i in range(n): key = random.choice(list(dic.keys())) m = 3 print(i + 1, '-ое слово. ', 'Подсказка: ', random.choice(dic[key]), ' ...', sep = '') fl = 0 while fl != 1 and m != 0: print('Попыток осталось: ', m, sep = '') if input('Введите ниже ваш ответ:\n').lower() == key: fl = 1 print('Молодец! Всё верно!') else: print('Неверно. ', end = '') if m != 1: print('Ещё одна подсказка: ', random.choice(dic[key]), ' ...', sep = '') m -= 1 if fl == 0: print('Вы не угадали. Правильный ответ: ', key, sep = '') def main(): dic = func1(input('Введите, пожалуйста, название файла:\n')) n = func2() func3(dic, n) main() def func1(name): f = open(name, 'r', encoding = 'utf-8', errors = 'ignore') words = f.read().replace('\n', ' ').split() f.close() for i, word in enumerate(words): words[i] = word.lower().strip('.”“,/1234567890@ return words def func3(words, word): fl = 0 num = 0 while fl != 1: try: ind = words.index(word) except ValueError: fl = 1 continue num += 1 words.pop(ind) print(word, ': frequency = ', num, sep = '') return words def func2(words): prefix = 'omni' length = len(prefix) for word in words: if word.startswith(prefix) and length < len(word): words = func3(words, word) words = func3(words, word[length:]) print('-------------------------------------') def main(): func2(func1('file.txt')) main() def func1(name): f = open(name, 'r', encoding = 'utf-8', errors = 'ignore') words = f.read().replace('\n', ' ').split() f.close() for i, word in enumerate(words): words[i] = word.lower().strip('.”“,/1234567890@ return words def func3(words, word): num = 0 for elem in words: if elem == word: num += 1 print(word, ': frequency = ', num, sep = '') def func2(words): prefix = 'under' length = len(prefix) l = [] for word in words: if word.startswith(prefix) and length < len(word) and word not in l: func3(words, word) func3(words, word[length:]) print('-------------------------------------') l.append(word) def main(): func2(func1('file.txt')) main() import os def files_and_folders(): lst = os.listdir('.') files = [] folders = [] for f in lst: if os.path.isfile(f): files.append(f) else: folders.append(f) d_files = {} for f in files: f_name, f_ext = os.path.splitext(f) if f_name not in d_files: d_files[f_name] = 1 else: d_files[f_name] += 1 return d_files, folders def counting(d_files): num = 0 punct_marks = '.!?:;,-()"\'<>' for key in d_files: fl = 0 i = 0 while fl != 1 and i < len(punct_marks): if punct_marks[i] in key: fl = 1 i += 1 if fl == 1: num += d_files[key] return num def output(num, d_files, d_folders): print('Количество файлов, названия которых содержит знаки препинания = ', num) print('Названия файлов и папок в данной папке следующие:') i = 1 for key in d_files: print('%s) %s' % (str(i), str(key))) i += 1 for key in d_folders: if key not in d_files: print('{}) {}'.format(str(i), str(key))) i += 1 def main(): d_files, folders = files_and_folders() num = counting(d_files) output(num, d_files, folders) if __name__ == '__main__': main() import os def walking(): num = 0 for root, dirs, files in os.walk('.'): d_files = {} flag = False for file in files: file_name, file_ext = os.path.splitext(file) if file_ext not in d_files: d_files[file_ext] = 1 else: flag = True break if flag: num += 1 return num def main(): num = walking() print('Количество папок, в которых встречаются несколько файлов с одним\ и тем же расширением = {}.'.format(num)) if __name__ == '__main__': main() import os def walking(): num = 0 for root, dirs, files in os.walk('.'): d_files = {} flag = False for file in files: file_name, file_ext = os.path.splitext(file) if file_ext not in d_files: d_files[file_ext] = 1 else: flag = True print(file) break if not flag: num += 1 return num def main(): num = walking() print('Количество папок, в которых встречаются несколько файлов с одним\ и тем же расширением = {}.'.format(num)) if __name__ == '__main__': main() import re def reading(name): f = open(name, 'r', encoding = 'utf-8') words = f.read().split(' ') f.close() return words def cleaning(words): for i, word in enumerate(words) : words[i] = word.lower().strip('.,/1234567890@ return words def printing(words): l = [] for word in words: if re.search('кот', word) and word not in l: l.append(word) print(word) def main(): words = reading(input('Введите, пожалуйста, название файла:\n')) words = cleaning(words) printing(words) if __name__ == '__main__': main() n = float(input('Введите любое число\n')) print('число | ', n) for i in range(9 + len(str(round(n * 10, 3)))) : print('-', end = '') print() for i in range(1,11) : if i != 10 : print(i, ' | ', round(i * n, 3), end = '\n') else : print(i, ' | ', round(i * n, 3), end = '\n') n = float(input('Введите любое число\n')) for i in range(1,11) : print(i, '*', n, '=', i * n, end = '\n') import re def reading(name): f = open(name, 'r', encoding = 'utf-8') lines = f.readlines() f.close() return lines def array(lines): text = ''.join(lines) text = re.sub('((.|\n)*)', '\\1', text) text = re.sub('<[wc](.*?)>(.*?)', '\\1 \\2', text) print(text) l = re.findall('lemma="(.*?)" type="(.*?)" (.*)', text) return l def recording1(d, n): f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8') f.write(str(n) + '\n') for key in d.keys(): f.write(key + '\n') f.close() def recording2(d): f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8') for key, value in d.items(): if re.search('l.f.*', key): f.write(key + ' - ' + str(value) + '\n') f.close() def recording3(l): name = input('Введите, пожалуйста, название выходного файла в формате csv\n') while not name.endswith('.csv'): name = input('Введите, пожалуйста, название выходного файла в формате csv\n') f = open(name, 'a', encoding = 'utf-8') for i, elem in enumerate(l): f.write(elem[0] + ',' + elem[1] + ',' + elem[2] + '\n') f.close() def dictionary(lines): d = {} for line in lines: r = re.search('lemma=".*" type="(.*)"', line) if r: key = r.group(1) if key in d: d[key] += 1 else: d[key] = 1 return d def main(): name = input('Введите, пожалуйста, название входного файла\n') lines = reading(name) n = len(lines) d = dictionary(lines) recording1(d, n) recording2(d) l = array(lines) recording3(l) if __name__== '__main__': main() import random def noun() : f = open('nouns.txt', 'r', encoding = 'utf-8') nouns = f.read().split() f.close() return random.choice(nouns) def personal_pronoun() : f = open('personal_pronouns.txt', 'r', encoding = 'utf-8') pronouns = f.read().split() f.close() return random.choice(pronouns) def adjective_before_noun() : f = open('adjectives_before_noun.txt', 'r', encoding = 'utf-8') adj = f.read().split() f.close() return random.choice(adj) def adjective_after_noun() : f = open('adjectives_after_noun.txt', 'r', encoding = 'utf-8') adj = f.read().split() f.close() return random.choice(adj) def adverb() : f = open('adverbs.txt', 'r', encoding = 'utf-8') adverbs = f.read().split() f.close() return random.choice(adverbs) def intensifier(adv): f = open('intensifiers.txt', 'r', encoding = 'utf-8') intensifiers = f.read().split() f.close() return random.choice(intensifiers) + ' ' + adv def transitive_infinitive() : f = open('transitive_infinitives.txt', 'r', encoding = 'utf-8') inf = f.read().split() f.close() return random.choice(inf) def intransitive_infinitive() : f = open('intransitive_infinitives.txt', 'r', encoding = 'utf-8') inf = f.read().split() f.close() return random.choice(inf) def temporary_marker() : f = open('temporary_markers.txt', 'r', encoding = 'utf-8') temporary_markers = f.read().split() f.close() return random.choice(temporary_markers) def interrogative() : f = open('interrogatives.txt', 'r', encoding = 'utf-8') interrogatives = f.read().split() f.close() return random.choice(interrogatives) def number() : f = open('numbers.txt', 'r', encoding = 'utf-8') numbers = f.read().split() f.close() return random.choice(numbers) def declension(noun, adjective, number) : f = open('declension_of_nouns.txt', 'r', encoding = 'utf-8') g = open('declension_of_adjectives.txt', 'r', encoding = 'utf-8') nouns = dict() adjectives = dict() for line in f.readlines() : s = line.split(' ', maxsplit = 1) nouns[s[0]] = s[1].split() for line in g.readlines() : s = line.split(' ', maxsplit = 1) adjectives[s[0]] = s[1].split() f.close() g.close() if nouns[noun][0] == 'm' and number == 'sg' : return noun, adjective, random.choice(['le', 'un']) elif nouns[noun][0] == 'm' and number == 'pl' : return nouns[noun][1], adjectives[adjective][1], random.choice(['les', 'des']) elif nouns[noun][0] == 'f' and number == 'sg' : return noun, adjectives[adjective][0], random.choice(['la', 'une']) elif nouns[noun][0] == 'f' and number == 'pl' : return nouns[noun][1], adjectives[adjective][2], random.choice(['les', 'des']) def collocation_bef(noun, adj_before_noun, article) : return article + ' ' + adj_before_noun + ' ' + noun def collocation_aft(noun, adj_after_noun, article) : return article + ' ' + noun + ' ' + adj_after_noun def conjugation(pronoun, infinitive) : f = open('conjugations.txt', 'r', encoding = 'utf-8') verbs = dict() for line in f.readlines() : s = line.split(' ', maxsplit = 1) verbs[s[0]] = s[1].split() f.close() if pronoun == 'je' : return verbs[infinitive][0] elif pronoun == 'tu' : return verbs[infinitive][1] elif pronoun == 'il' or pronoun == 'elle' : return verbs[infinitive][2] elif pronoun == 'nous' : return verbs[infinitive][3] elif pronoun == 'vous' : return verbs[infinitive][4] else : return verbs[infinitive][5] def affirmative_sequence(pronoun, verb) : if verb[0] in 'aàâeéèêiîoôuùûy' and pronoun == 'je' : return "j'" + verb else : return pronoun + ' ' + verb def interrogative_sequence(pronoun, verb) : if verb[len(verb) - 1] in 'aàâeéèêiîoôuùûy' and pronoun[0] in 'aàâeéèêiîoôuùûy' : return verb + '-t-' + pronoun else : return verb + '-' + pronoun def affirmative_sentence() : pron = personal_pronoun() noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'sg') noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'pl') return affirmative_sequence(pron, conjugation(pron, transitive_infinitive())) + ' ' + collocation_bef(noun1, adj1, art1) + ' et ' + number() + ' ' + adj2 + ' ' + noun2 + '.' def interrogative_sentence() : pron = personal_pronoun() return interrogative() + ' ' + interrogative_sequence(pron, conjugation(pron, intransitive_infinitive())) + ' ' + temporary_marker() + '?' def negative_sentence() : noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'pl') noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'sg') return collocation_aft(noun1, adj1, art1) + ' ne ' + conjugation('elle', transitive_infinitive()) + ' pas ' + collocation_bef(noun2, adj2, art2) + ' ' + temporary_marker() + ' ' + intensifier(adverb()) + '.' def conditional_sentence() : return '[Здесь должно быть условное предложение, но я пока не представляю, как оно устроено во французском :( ].' def imperative_sentence() : return 'ne ' + conjugation('vous', intransitive_infinitive()) + ' pas' +'!' def random_sentence(n) : if n == 1 : return affirmative_sentence() elif n == 2 : return interrogative_sentence() elif n == 3 : return negative_sentence() elif n == 4 : return conditional_sentence() else : return imperative_sentence() def text_print() : a = set('12345') for n in a : print(random_sentence(int(n)).capitalize(), end = ' ') text_print() import re def reading(name): f = open(name, 'r', encoding = 'utf-8') words = f.read().replace('\n', ' ').split() f.close() return words def cleaning(words): for i, word in enumerate(words) : words[i] = word.lower().strip('.,/1234567890@ return words def printing(words): for word in words: if re.search('загру(з(ят(ся)?|и(шь(ся)?|(сь)?|м(ся)?|л((ся)?|а(сь)?|и(сь)?|о(сь)?)|т((ся)?|е(сь)?|ь(ся)?)|в(ш(ую(ся)?|ая(ся)?|е(го(ся)?|му?(ся)?|й(ся)?|е(ся)?|ю(ся)?)|и((сь)?|й(ся)?|м(и)?(ся)?|е(ся)?|х(ся)?)))?))|ж(у(сь)?|ен(а|о|ы)?|ён|(е|ё)нн(ая|ую|о(м(у)?|ю|е|го|й)|ы(м(и)?|й|е|х))))$', word): print(word) def main(): words = cleaning(reading(input('Введите, пожалуйста, название файла:\n'))) printing(words) main() import re def reading(name): f = open(name, 'r', encoding = 'utf-8') text = f.read() f.close() return text def find(text): r = re.search('\= 0 : bigw += 1 else : j = 0 while j < len(l[i]) and alph.find(l[i][j]) == -1 : j += 1 if j == len(l[i]) : allw -= 1 elif alphUP.find(l[i][j]) >= 0 : bigw += 1 if allw != 0 : print('The percentage of words, which start with uppercase equals to ', round(bigw / allw * 100, 3), '%', sep = '') else : if fl == 0 : print('There are no words at all! Try to use another file.') else : print('There are some symbols, but no words in Russian. Try to use another file!') f.close() fl = 0 while fl != 1 : word = input('Please input one word:\n') ind = word.find(' ') if ind == -1 : fl = 1 else : if ind == 0 : word = word[1:] flag = word.find(' ') while flag == 0 : word = word[1:] flag = word.find(' ') if flag > 0 : subword = word[flag:] ind = subword.find(' ') while ind == 0 : subword = subword[1:] ind = subword.find(' ') if subword != '' : print('There is more than one word. Please try again!') else : word = word[:flag] fl = 1 else : if word != '' : fl = 1 else : print("You didn't type any word! Please try again!") else : subword = word[ind:] flag = subword.find(' ') while flag == 0 : subword = subword[1:] flag = subword.find(' ') if subword != '' : print('There is more than one word. Please try again!') else : word = word[:ind] fl = 1 for i in range(len(word)) : print(word[i:]) fl = 0 while fl != 1 : word = input('Please input one word:\n') word = word.strip() ind = word.find(' ') if ind == -1 : if word != '' : fl = 1 else : print("You didn't type any word! Please try again") else : print('There is more than one word. Please try again!') for i in range(len(word)) : print(word[i:]) a = float(input('enter the first number\n')) b = float(input('enter the second number\n')) c = float(input('enter the third number\n')) if b == 0. : print('you can\'t divide by zero') elif a % b == c and a / b == c : print('YES') else : print('NO') a = float(input('enter the first number\n')) b = float(input('enter the second number\n')) c = float(input('enter the third number\n')) if b == 0. : print('you can\'t divide by zero') else : if a % b == c : print('YES, a % b == c') else : print('NO, a % b != c') if a / b == c : print('YES, a / b == c') else : print('NO, a / b != c') a = int(input('enter the first number\n')) b = int(input('enter the second number\n')) c = int(input('enter the third number\n')) if b == 0 : print('you can\'t divide by zero') elif a % b == c and a / b == c : print('YES') else : print('NO') a = int(input('enter the first number\n')) b = int(input('enter the second number\n')) c = int(input('enter the third number\n')) if b == 0 : print('you can\'t divide by zero') else : if a % b == c : print('YES, a % b == c') else : print('NO, a % b != c') if a / b == c : print('YES, a / b == c') else : print('NO, a / b != c') import re import os import csv def printing(d1, d2, arr): f = open('output1.txt', 'w', encoding = 'cp1251') for key, value in sorted(d1.items()): f.write(key + '\t' + str(value) + '\n') f.close() with open('output2.csv', 'w', encoding = 'cp1251') as csv_file: writer = csv.writer(csv_file, delimiter = ';') writer.writerow(['Название файла', 'Автор', 'Дата создания текста']) for key, value in sorted(d2.items()): lst = [str(key), str(value[0]), str(value[1])] writer.writerow(lst) f = open('output3.txt', 'w', encoding = 'cp1251') for elem in arr: f.write(elem + '\n') f.close() def dictionary(name): d1 = {} d2 = {} arr = [] for file in os.listdir(name): with open(os.path.join(name, file), 'r', encoding = 'cp1251') as text: text = text.read() a = re.findall('(.*?)', text) d1[file] = len(a) b = re.findall('<.*?>(.*?)([\s,.!123456790:;?""])', text) words = [words_punct[i][0] for i in range(len(words_punct))] puncts = [words_punct[i][1] for i in range(len(words_punct))] d = re.findall('gr="A.*?gen.*?>(.*?)\s.*?gr="S.*?gen.*?>(.*?)', text) e = [] for i, elem in enumerate(d): ind1 = words.index(elem[0]) ind2 = words.index(elem[1]) if ind2 - ind1 == 1: t = ind1 - 1 while t >= 0 and puncts[t] not in '[.?!]': t -= 1 k = ind2 while k <= len(words) - 1 and puncts[k] not in '[.?!]': k += 1 s = '' for p in range(t + 1, k): if p != ind1 and p != ind2: s += words[p] + puncts[p] elif p == ind1: s += '\t' + words[p] + puncts[p] else: s += words[p] + puncts[p] + 't' e.append(s) arr.extend(e) return d1, d2, arr def main(): d1, d2, arr = dictionary('news') printing(d1, d2, arr) if __name__ == '__main__': main() import re def reading(): f = open('input.txt', 'r', encoding = 'utf-8') text = f.read() f.close() text = text.replace('...', '.') text = text.replace('—', '') text = re.sub('[\.!\?]([а-яa-z])', ' \\1', text) text = re.sub('[\.!\?]\)?»? ?«?\(?([а-яa-z])', ' \\1', text) text = re.sub('\.([A-ZА-Я])', ' \\1', text) text = re.sub('([A-ZА-Я])\. ([A-ZА-Я])', '\\1 \\2', text) sentences = re.split(r'[.!?]', text) sentences = [' '.join([word.strip('» «\n:<>\'"@ return sentences def output(sentences): f = open('output.txt', 'a', encoding = 'utf-8') for sentence in sentences: if len(sentence.split()) > 10: s = 0 for word in sentence.split(): s += len(word) f.write('"{}": это предложение со словами длины {:.1f}\n'.format(sentence, s/len(sentence.split()))) f.close() def main(): sentences = reading() output(sentences) if __name__ == '__main__': main() print('Введите 7 целых чисел') arr = [] for i in range(1, 8) : print('Введите ', i, '-ое целое число', sep = '') arr.append(int(input())) for i in range(7) : for j in range(arr[i]) : print('X', end = '') print() import re def reading(name): f = open(name, 'r', encoding = 'utf-8') text = f.read() f.close() return text def correction(text): corrected_text = re.sub('(Ф|ф)инлянди( |я(х|(ми?))?|и|й|ю|е(й|ю))', '\\1@алайзи\\2', text) corrected_text = re.sub('ФИНЛЯНДИ( |Я(Х|(МИ?))?|И|Й|Ю|Е(Й|Ю))', 'МАЛАЙЗИ\\1', corrected_text) corrected_text = corrected_text.replace('Ф@', 'М') corrected_text = corrected_text.replace('ф@', 'м') return corrected_text def recording(text): f = open(input('Введите, пожалуйста, название файла вывода:\n'), 'w', encoding = 'utf-8') f.write(text) f.close() def main(): text = reading(input('Введите, пожалуйста, название файла ввода:\n')) corrected_text = correction(text) recording(corrected_text) if __name__ == '__main__': main() import re def open_text(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() return text def search(text): m = re.findall('\\bдинозавр(a(ми|х)?|у|о(м|в)|е|ы)', text) return m def tags(text): m = re.sub('<.*?>', '', text, flags = re.DOTALL) return m def replace(text): a = re.sub('\\bдинозавр', 'кот', text, flags = re.DOTALL) return a def images(text): n = re.sub('(а|е|ё|и|оуэюя)') fname = input() txt = open_text(fname) res = replace(txt) print(res) import re import os def auth(direct): d = {} for root, dirs, files in os.walk(direct): for file in files: with open(os.path.join(direct, file)) as f: text = f.read() regex1 = 'content=".*" name="author"' a = re.findall(regex1, text) for elem in a: b = re.sub('content="', '', elem) c = re.sub('" name="author"', '', b) d[file] = c return d def topic(direct): d = {} for root, dirs, files in os.walk(direct): for file in files: with open(os.path.join(direct, file)) as f: text = f.read() regex1 = 'content=".*" name="topic"' a = re.findall(regex1, text) for elem in a: b = re.sub('content="', '', elem) c = re.sub('" name="topic"', '', b) d[file] = c return d def main(): direct = './news' d1 = auth(direct) d2 = topic(direct) with open('./15.csv', 'w', encoding='utf-8') as f: for key in d1.keys(): f.write('\n{}'.format(key)) f.write(' {} '.format(d1[key])) f.write('{}'.format(d2[key])) main() def open_text(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() arr = text.split() for index, elem in enumerate(arr): arr[index] = elem.strip(',.;:!?\n ') return arr def first_letter(letter, way_to_file): arr = open_text(way_to_file) array = [] for elem in arr: if elem[0] == letter: array.append(elem) return array def questions(): letter = input() fname = input() result = first_letter(letter, fname) return result result = questions() print(result) import re import os def number_sent(direct): d = {} for root, dirs, files in os.walk(direct): for file in files: with open(os.path.join(direct, file)) as f: text = f.read() regex = '' arr = re.findall(regex, text) d[file]=len(arr) return d def main(): direct = './news' d = number_sent(direct) with open('./11.txt', 'w', encoding='utf-8') as f: for key in d.keys(): f.write('\n{} {}'.format(key, d[key])) main() import random def imperative(): imperative = ["прокати", "уходи", "не спеши", "погоди", "подожди", "позвони", "убегай", "не плошай", "подержи"] return random.choice(imperative) def verb(): plural_verbs = ["привезут", "принесли", "принесут", "пожуют", "погрызут", "приплетут", "приведут", "привели"] return random.choice(plural_verbs) def noun_phrase(): clitics = ["по", "ни", "на", "хоть", "лишь", "вот", "не", "от", "за", "пусть"] clitic = random.choice(clitics) words2 = ["себе", "тебе", "земля", "игра", "звезда", "мороз", "ответ", "превед", "футбол", "печаль", "бокал"] noun = random.choice(words2) return clitic + ' ' + noun def noun(number): singular_nouns = ["монолог", "коридор", "почему", "потому", "отчего", "каратэ", "кабарэ", "курага", "кандидат"] plural_nouns = ["малыши", "рукава", "камыши", "табуны", "рюкзаки", "пиджаки", "пацаны", "чуваки"] if number == 's': return random.choice(singular_nouns) return random.choice(plural_nouns) def punctuation(): marks = [".", "?", "!", "..."] return random.choice(marks) def verse1(): return noun('pl') + ' ' + verb() + ' ' + noun('pl') + punctuation() def verse2(): return imperative() + ' ' + noun('s') + ' ' + noun_phrase() + punctuation() def verse3(): return noun_phrase() + ' ' + verb() + ' ' + noun('pl') + punctuation() def make_verse(): verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(make_verse()) word=input() for index, elem in enumerate(word): if (index + 1) % 2 ==1: if elem in 'пое': print(elem) else: continue else: continue message=input('Введите слово или сообщение: ') result='' for letter in message: result += letter print(result) import re def open_text(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() arr = text.split() for index, elem in enumerate(arr): arr[index] = elem.strip(',.;:!?\n ') return arr def main(): reglex = 'на(й(ти|д(я|ут?|((е|ё)(шь|т|м|те)|ен(а|о|ы)?)))|ш((е|ё)л|л(а|о|и))|шедш(е(е|й|го|му?)|ая|ую|и(й|е|х|ми?))|йденн(о(е|го|ому?)|ая|ой|ую|ы(й|е|х|ми?))(с(ь|я))?)' fname = input() arr = open_text(fname) array = [] for elem in arr: m = re.search(reglex, elem) if m != None: if elem not in array: array.append(elem) return array result = main() print(result) import random def organising_array(way_to_file): f = open(way_to_file, 'r', encoding = 'utf-8') file = f.read() arr = file.split('\n') return arr def noun_phrase(): adjectives = organising_array('./1.txt') adjective = random.choice(adjectives) nouns = organising_array('./2.txt') noun = random.choice(nouns) return adjective + ' ' + noun def clause(): clauses = organising_array('./3.txt') return random.choice(clauses) def adverb(): adverbs = organising_array('./4.txt') return random.choice(adverbs) def clause2(): clitics = organising_array('./5.txt') clitic = random.choice(clitics) pronouns = organising_array('./6.txt') pronoun = random.choice(pronouns) verbs = organising_array('./7.txt') verb = random.choice(verbs) return clitic + ' ' + pronoun + ' ' + verb def objects(): objects = organising_array('./8.txt') return random.choice(objects) def patient(): patients = organising_array('./9.txt') return random.choice(patients) def verb(): verbs = organising_array('./10.txt') return random.choice(verbs) def praep_phrase(): praeps = organising_array('./11.txt') praep = random.choice(praeps) nouns = organising_array('./12.txt') noun = random.choice(nouns) return praep + ' ' + noun def adjective(): adjectives = organising_array('./13.txt') return random.choice(adjectives) def punctuation(): marks = organising_array('./14.txt') return random.choice(marks) def verse1(): return noun_phrase()+ punctuation() + ' ' + clause() + punctuation() def verse2(): return adverb() + ' ' + clause2() + ' ' + objects() + punctuation() def verse3(): return patient() + ' ' + verb() + ' ' + praep_phrase() + ' ' + adjective() + punctuation() def make_verse(): verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(make_verse()) import re def open_text(way): with open(way, 'r', encoding = 'utf-8') as f: text = f.read() text = text.replace(',', '') text = text.replace('.', '') text = text.replace(':', '') text = text.replace(';', '') text = text.replace('!', '') text = text.replace('?', '') text = text.replace('-', '') text = text.replace('"', '') text = text.replace('(', '') text = text.replace(')', '') return text def search(text): regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b' m = re.findall(regex, text, flags = re.DOTALL) return m def write(fname, m): with open(fname, 'a', encoding = 'utf-8') as f: for elem in m: f.write(elem) f.write('\n') way = input() fname = input() text = open_text(way) m = search(text) write(fname, m) def open_text(way): with open(way, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() arr = text.split() for index, elem in enumerate(arr): arr[index] = elem.strip(',.;:!?-') return arr def main(): fname = input() arr = open_text(fname) n = len(arr) return n res = main() print('В файле содержится ', res, ' слов') def open_text(way): with open(way, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() arr = text.split() for index, elem in enumerate(arr): arr[index] = elem.strip(',.;:!?-"') return arr def freq_list(arr): d = {} for elem in arr: if elem not in d.keys(): d[elem] = 1 else: d[elem] = d[elem] + 1 return d def sort(d): array = [] for elem in d.keys(): array.append(elem) arr = [] for i in range(len(array)): temp = array[i] for index, elem in enumerate(array): if elem < temp: t = temp temp = elem array[index] = t if temp not in arr: arr.append(temp) return arr def write(fname, d, arr): with open(fname, 'a', encoding = 'utf-8') as f: for elem in arr: f.write(elem) f.write(',') f.write(str(d[elem])) f.write('\n') fname = input() fname2 = input() arr = open_text(fname) d = freq_list(arr) a = sort(d) write(fname2, d, a) import re def open_text(way): with open(way, 'r', encoding = 'utf-8') as f: text = f.read() text = re.sub(',.:;!?-"()\n', '', text) arr = text.split() return arr def search(arr): result = [] regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b' for i in range(len(arr) - 7): current = ' '.join(arr[i:i+7]) m = re.search(regex, current) if m is not None: result.append(current) return result def write(fname, m): with open(fname, 'a', encoding = 'utf-8') as f: for elem in m: f.write(elem) f.write('\n') way = input() fname = input() text = open_text(way) m = search(text) write(fname, m) import random def opening_csv(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() arr = text.split('\n') d = {} for elem in arr: array = elem.split(',') d[array[0]] = array[1] return d def random_key(d): array = [] for elem in d.keys(): array.append(elem) word = random.choice(array) return word def main(): way_to_file = input() vocabul = opening_csv(way_to_file) word = random_key(vocabul) print(word) for i in range(len(vocabul[word])): print('_', end = ' ') solve = input() if solve == vocabul[word]: result = 'WIN!!!' else: result = 'FAIL(((' return result res = main() print(res) import re def open_text(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() return text def search(text): text1 = re.sub('<.*?>', '', text, flags = re.DOTALL) text2 = re.sub('\n', '', text1, flags = re.DOTALL) m = re.findall('Часовой поясUTC.?[0-9]', text2) return m def write(arr, way_to_file2): with open(way_to_file2, 'a', encoding = 'utf-8') as f: for elem in arr: newtext = f.write(elem) return newtext def main(): fname1 = input() fname2 = input() t = open_text(fname1) txt = search(t) res = write(txt, fname2) return res a = main() import os import re def no_numbers(): num = '(1|2|3|4|5|6|7|8|9|0)' file = '\.' a = [] for elem in os.listdir('.'): res = re.search(num, elem) if res == None: result = re.search(file, elem) if result: a.append(elem) n = len(a) return n def no_repet(): arr = [] for elem in os.listdir('.'): a = re.sub('\..*', '', elem) if a not in arr: arr.append(a) return arr print('Количество файлов без цифр в названии равно', no_numbers()) print('Найдены следующие файлы и папки (без повторов):', no_repet()) a=[] word=input() while word: a.append(word) word=input() for el in a: if len(el)>5: print(el) else: continue import re def open_text(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() return text def replace1(txt): m = re.sub(r'\bвикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bбурундук\1', txt, flags = re.DOTALL) return m def replace2(txt): n = re.sub(r'\bВикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bБурундук\1', txt, flags = re.DOTALL) return n def write(txt, way_to_file2): with open(way_to_file2, 'w', encoding = 'utf-8') as f: newtext = f.write(txt) return newtext def main(): fname1 = input() fname2 = input() txt = open_text(fname1) r = replace1(txt) res = replace2(r) result = write(res, fname2) return result a = main() import os import re def dir_choose_kyr(dir_name): arr = [] regex ='[А-Я|Ё|а-я|ё| ]*' for root, dirs, files in os.walk(dir_name): for elem in dirs: r = re.sub(regex, '', elem) if r == '': arr.append(elem) print(arr) n = len(arr) return n def main(): dir_name = '.' n = dir_choose_kyr(dir_name) print('В папке найдено ', n, ' папок с полностью кириллическими названиями (допускаются пробелы между словами)') return n main() way = input('Введите путь к файлу без дополнительных символов: ', ) f = open(way, 'r', encoding = 'utf-8') text = f.read() f.close() min = len(text) max = 0 arr = text.split('\n') for el in arr: if len(el) > max: max = len(el) if len(el) < min: min = len(el) k = max/min print('Самая короткая строка короче самой длинной в ', k, ' раз(а)') def open_text(way_to_file): with open(way_to_file, 'r', encoding = 'utf-8') as f: text = f.read() text = text.lower() arr = text.split() for index, elem in enumerate(arr): arr[index] = elem.strip(',.;:!?\n ') return arr def finding_suffix(suffix, way_to_file): arr = open_text(way_to_file) array = [] for elem in arr: a = len(elem) - len(suffix) b = len(elem) if elem[a:b] == suffix: array.append(elem) return array def one_word_once(array): arr = [] for elem in array: if elem not in arr: arr.append(elem) return arr def func(array): temp = [] arr = [] for elem in array: if elem not in temp: temp.append(elem) else: arr.append(elem) return arr def count_freq(array): result = array for i in range(len(array)): temp = func(result) if len(temp)==0: break else: result = temp return result fname = input() suffix = 'ness' arr = finding_suffix(suffix, fname) array = one_word_once(arr) print('В тексте имеются следующие слова с суффиксом ', suffix, ':') for elem in array: print(elem) max_freq = count_freq(arr) print('Макс. частоту имеет(-ют) слово(-а):', max_freq) import os,re def counting_sentences(file): sentences = re.findall('', file) return len(sentences) def opening_folders(folder): path = folder dic = {} for file in os.listdir(folder): with open ((os.path.join(folder, file))) as f: text = f.read() number = int(counting_sentences(text)) dic[file] = number return dic def writing_table(dic): with open ('number_of-sentences.txt', 'w', encoding = 'utf-8') as f: for file in dic: f.writelines(file + '\t' + str(dic[file]) + '\n') def author_and_topic(folder): path = folder for file in os.listdir(folder): with open ((os.path.join(folder, file))) as f: text = f.read() reg1 = '(content="(.*)" name="author")' reg2 = '(content="(.*)" name="topic")' for i in range (1): for i in re.findall(reg1, text): author = i[1] for i in re.findall(reg2, text): topic = i[1] writing_table(opening_folders(r'C:\Users/student/Desktop/news/')) author_and_topic(r'C:\Users/student/Desktop/news/') import os number = 0 for roots, dirs, files in os.walk('.'): names = [] for f in files: name = f[::-1].split('.')[0] if name not in names: names.append(name) else: number += 1 break print(number) import re m1 = 'загруж(у|(енн?(ы(й|е|х|ми?)?|о(го|му?|е|й)?|ую|а)?))(с(я|ь))?' m2 = 'загруз(и(т|шь|м|т(ь|е)?|л(а|о|и)?)?(в(ш(и(й|ми?|х)|е(го|му?|е|й)))?)?|ят)(с(я|ь))?' with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW9\re.txt",'r', encoding='utf-8') as f: mas = [] for line in f: words = line.split() for word in words: word = word.strip(',.;"()-!?') mas.append(word.lower()) arr = [] for i in mas: a = re.search(m1,i) b = re.search(m2,i) if a != None and len(a.group()) == len(i): if a.group() not in arr: arr.append(a.group()) print(a.group()) if b != None and len(b.group()) == len(i): if b.group() not in arr: arr.append(b.group()) print(b.group()) i=1 int(i) print ('Введите любое число') a=int(input()) while i<=10: print(i,'*',a,'=',(i*a),';') i += 1 else: print ('Цикл завершен') def opening(name): with open (name, 'r', encoding = 'utf-8') as f: count = 0 for line in f: if line !=' \n': count += 1 else: break return count def writing(): count = opening(name) with open('2.txt', 'w', encoding = 'utf-8') as f: f.write(str(count)) name = r'C:\Users\student\Desktop\1.xml' writing() import re with open (name, 'r', encoding = 'utf-8') as f: content = f.read() arr = re.findall(r'(.*?)', content) d = {} for i in arr: d[i[1]] = content.count(i[1]) with open ('3.txt', 'w', encoding = 'utf-8') as f: for key in d: a = str(key) + ' ' + str(d[key]) + '\n' f.write(a) def reading(): with open (r"C:\Users\Анна\Documents\ФиКЛ\PythonHW7\omni.txt", 'r', encoding='utf8') as text: mas = [] for line in text: words = line.split() for word in words: word = word.strip(',.;"()-!?') mas.append(word.lower()) return(mas) def omni_counting(): s = 0 omni = [] for word in reading(): if word[:4] == 'omni': s += 1 if word not in omni: omni.append(word) print (s,'words with OMNI-') p = 0 for word in omni: without_omni = [] w2 = word[4:] if w2 not in without_omni: without_omni.append(w2) p += int(reading().count(w2)) print(p, 'words without OMNI-') omni_counting() import os, re def folder_opening(big_folder): names = [item for item in os.listdir(big_folder) if os.path.isfile(item) and re.search('[^.]*\..*?[,._?<>''""!-()].*?',str(item)[::-1])] return len(names) print('Найдено',folder_opening('.'), 'файлов, название которых содержит знаки препинания') def all_files(big_folder): files = [item[::-1] for item in os.listdir(big_folder) if os.path.isfile(item)] all_files = [] for item in files: all_files.append((re.sub(u'([^.]*\.)?(.*)', u'\\2', str(item))[::-1])) for item in os.listdir(big_folder): if os.path.isdir(item): all_files.append(item) all_files_new = [] for item in all_files: if item not in all_files_new: all_files_new.append(item) return all_files_new print('Все файлы:', all_files('.')) total = 0 upletters = 0 with open(r'C:\Users\Анна\Documents\GitHub\prog\PythonHW5\text.txt','r',encoding='utf8') as f: text = f.read() words = text.split() for item in words: total += 1 for letter in item: if letter.isupper(): upletters += 1 else: continue print("Количество слов в тексте: ",total) print('Количество слов с заглавной буквы',upletters) print("Процент слов в тексте, начинающихся с заглавной буквы: ", upletters/total,'%') arr = [] num = 1 while num <=7: chislo = int(input('Vvedite chisclo')) if chislo > 0: arr.append('X'*chislo) else: arr.append('') num += 1 for i in arr: print(i) def opening(file): with open(file, 'r', encoding = 'utf-8') as f: f = f.read() sentences = f.split('.') mas = [] for sentence in sentences: sentence = sentence.split('!') for i in sentence: i = i.split('?') for a in i: mas.append(a) return mas for sentence in opening(r"C:\Users\Анна\Documents\GitHub\prog\PythonHW12\text.txt"): words = sentence.split() new_words = [word.strip('.,!?/-;:''""«»—()') for word in words if len(words) > 10] print(new_words) lenght = 0 for word in new_words: lenght += len(word) if new_words: template = 'Это предложение со словами длины {:.1f}' print (template.format(lenght/len(new_words))) print ('Введите три числа A,B и C') a = int(input ()) b = int(input ()) c = int(input ()) print ("A =",a) print ("B =",b) print ("C =",c) print ('A+B =',(a+b)) print ('A*C + B =',(a*c+b)) if (a+b) == c: if (a*c)+b==0: print ('сумма чисел A и B равна С и число С является решением квадратного уравнения ax+b=c') else: print ('сумма чисел A и B равна С, но число С не является решением квадратного уравнения ax+b=c') elif (a*c)+b == 0: print ('сумма чисел A и B не равна С, но число С является решением квадратного уравнения ax+b=c') else: print ('сумма чисел A и B не равна С и число С не является решением квадратного уравнения ax+b=0') word = input('Vvedite slovo') offset = 1 int(offset) for offset in range (len(word)): print (word [offset::]) with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW8\words.csv", 'r', encoding = 'utf-8') as text: mas = [] for line in text: words = line.split(',') for word in words: mas.append(word) words = {} for i in mas: word = i.split(';') words[word[0].strip()] = word[1].strip() for key in words: print(key, '...') p = 3 for i in range (3): if input() != words[key]: p -= 1 print('Осталось', p, 'попыток') if p == 0: print('Вы не угадали слово') else: print ('Ура! Вы угадали слово!') break import random def nouns(): f = open (r'D:\Desktop\Аня\sentence_generator\nouns.txt','r', encoding = 'UTF-8') a = f.read() a = a.split() arr = [] for w in a: arr.append(w) return random.choice(arr) f.close() def adjectives(): f = open (r'D:\Desktop\Аня\sentence_generator\adjectives.txt','r', encoding = 'UTF-8') a = f.read() a = a.split() arr = [] for w in a: arr.append(w) return random.choice(arr) + ' ' + nouns () def verbs(): f = open (r'D:\Desktop\Аня\sentence_generator\verbs.txt','r', encoding = 'UTF-8') a = f.read() a = a.split() arr = [] for w in a: arr.append(w) return random.choice(arr) f.close() def adverbs(): f = open (r'D:\Desktop\Аня\sentence_generator\adverbs.txt','r', encoding = 'UTF-8') a = f.read() a = a.split() arr = [] for w in a: arr.append(w) return random.choice(arr) f.close() def assertion(): return(adjectives()) + ' ' + (verbs()) + 't' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs()) def sentence(): return 'La ' + (assertion()) + '.' def negation(): return 'La ' + (adjectives()) + ' ' + 'ne' + ' ' + (verbs()) + 't' + ' ' + 'pas ' + 'une ' + (adjectives()) + ' '\ + (adverbs()) + '.' def question(): return 'La ' + (adjectives()) + ' ' + (verbs()) + 't' + '-elle ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '?' def conditions (): return 'Si ' + (assertion()) + ', ' + (assertion()) + '.' def imperative(): a = str(verbs()) return (a.capitalize() + 's' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '!') mas = [(sentence()),(negation()),(question()),(conditions ()),(imperative ())] mass = [] for i in range (len(mas)): for item in mas: randitem = random.choice(mas) if randitem not in mass: mass.append(randitem) print(randitem) def content(name): with open (name, 'r', encoding = 'utf-8') as f: content = f.read() return content name = r"C:\Users\Анна\Documents\GitHub\prog\PythonHW10\Squirrels.html" import re reg = u'(Отряд:

Научная сфера:", a[i+1]) if r1 and r2: r = re.search("(

\ .*

)", \ a[i+2]) break f.close() return r def func2(): if func1(): title = func1().group(2) else: print ('что-то пошло не так') return title f = open("text_wiki.txt", 'w', encoding = "utf-8") f.write(func2()) f.close() f = open("text_wiki.txt", encoding = "utf-8") a = f.readlines() for line in a: print(line) import os alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ. ' def func1(): number = 0 arr1 = [] for i in os.listdir('.'): if os.path.isfile(i): j = 0 check1 = True check2 = 0 for j in range(len(i)): if i[j] not in alphabet: check1 = False if i[j] == '.': check2 += 1 if check1 == True and check2 <= 1: number += 1 arr1.append(i) print('Найдено файлов, название которых состоит \ только из латинских символов: ' + str(number)) return arr1 def func2(arr): arr2 = [] for i in arr: if i[0:i.find('.')] not in arr2: arr2.append(i[0:i.find('.')]) for k in arr2: print (k) return func2(func1()) import re def func1(): f1 = open("Философия -- Википедия.txt", 'r', encoding = "utf-8") change1 = re.sub('Филос(о́|о)фи(я(х|ми?)?|и|е?й|ю)', 'Астрол\\1ги\\2', f1.read()) change2 = re.sub('философи(я(х|ми?)?|и|е?й|ю)', 'астрологи\\1', change1) f1.close() return change2 def func2(): f2 = open("Астрология.txt", 'w', encoding = "utf-8") f2.write(func1()) f2.close() return True func2() import re def func1(): arr = [] i = 0 f = open("Космическая программа Китая.txt", encoding = "utf-8") a = f.readlines() for line in a: arr.append(line) f.close() return arr i = 0 arr1 = [] for i in range(len(func1())): res = re.findall('«[А-Яа-я ]*-[1-9]»', func1()[i]) j = 0 for j in range(len(res)): if res[j] not in arr1: arr1.append(res[j]) print (res[j]) word=input('Введите русское существительное первого склонения') if word.endswith('а') or word.endswith ('я'): print ('Именительный падеж, единственное число') elif word.endswith ('ами') or word.endswith ('ями'): print ('Творительный падеж, множественное число') elif word.endswith('ы') or word.endswith ('и'): print ('Родительный падеж, единственное число или именительный или винительный падеж, множественное число') elif word.endswith('е'): print ('Дательный или предложный падеж, единственное число') elif word.endswith('ой') or word.endswith ('ою') or word.endswith('ёй') or word.endswith ('ёю')or word.endswith ('ею') or word.endswith ('ей'): print ('Творительный падеж, единственное число') elif word.endswith('у') or word.endswith('ю'): print ('Винительный падеж, единственное число') elif word.endswith ('ам') or word.endswith ('ям'): print ('Дательный падеж, множественное число') elif word.endswith ('ах') or word.endswith('ях'): print ('Предложный падеж, множественное число') else: print ('Родительный или винительный падеж, множественное число') import re def func3b(string): r = re.match('([А-Яа-я][a-я]*)= 10: print (word, freqdict(func1())[word]) a=9 a=int(a) s=input('Введите число') s=int(s) if a==s: print('Позравляю, вы угадали') else: if a>s: print('Загаданное число больше') if a.+', line) w_sum += len(arr1) arr2 = re.findall('ana', line) ana_sum += len(arr2) print (str(ana_sum/w_sum)) return func1() def func1(): arr = [] i = 0 f = open("1.txt", encoding = "utf-8") a = f.readlines() for line in a: words = line.split() for i in range(len(words)): words[i] = words[i].lower() words[i] = words[i].strip('.,!?/\|()";:') arr.append(words[i]) f.close() return arr def func2(x,arr): glasnye = 'аяоёуюэеыи' slova = [] i = 0 for i in range(len(arr)): j = 0 slogi = 0 for j in range(len(arr[i])): if arr[i][j] in glasnye: slogi += 1 if slogi == x: slova.append(arr[i]) return slova def func3(bukva,arr): slova = [] for word in arr: if word[0] == bukva: slova.append(word) return slova y = input('Введите букву русского алфавита ') print(func3(y,func1())) def func1(): arr = [] i = 0 f = open("1.txt", 'r', encoding = "utf-8") a = f.readlines() for line in a: words = line.split() for i in range(len(words)): words[i] = words[i].lower() arr.append(words[i].strip(',.()«»!')) f.close() arr.sort() return arr def freqdict(arr): word_count = {} for word in arr: if word not in word_count: word_count[word] = 1 else: word_count[word] += 1 return word_count f1 = open("2.tsv", 'w', encoding = "utf-8") for j in sorted(freqdict((func1()))): f1.write(j) f1.write('\t') f1.write(str(freqdict(func1())[j])) f1.write('\n') f1.close() alphabet = 'abcdefghijklmnopqrstuvwxyzабвгдеёжзийклмнопрстуфхцчшщъыьэюя' alphabet = list(alphabet) def freqdict1(arr): letter_count = {} for letter in alphabet: letter_count[letter] = 0 for word in arr: if word.startswith(letter): letter_count[letter] += 1 return letter_count f2 = open("3.tsv", 'w', encoding = "utf-8") for k in sorted(freqdict1(func1())): f2.write(k) f2.write('\t') f2.write(str(freqdict1(func1())[k])) f2.write('\n') f2.close() a=9 a=int(a) s=input('Введите число ') if len(s)==0: print ('Game over') s=int(s) while a!=s: if a>s: print('Загаданное число больше') if a.+", line) if s1: if s1.group(1) not in freqdict: freqdict[s1.group(1)] = 1 else: freqdict[s1.group(1)] += 1 return freqdict def func2(): f3 = open("Ключи.txt", 'w', encoding = "utf-8") for i in func1(): f3.write(i) f3.write('\n') f3.close() return True func2() def func3(): f4 = open("Прилагательные.txt", 'w', encoding = "utf-8") for i in func1(): s2 = re.search("l.f...", i) if s2: f4.write(i) f4.write(' ') f4.write(str(func1()[i])) f4.write('\n') f4.close() return True func3() def func4(): f5 = open("Внутри тега body.txt", 'r', encoding = "utf-8") change1 = re.sub("(.+)", "\\1 \\2 \\3", f5.read()) change2 = re.sub("<.*>", ' ', change1) f5.close() return change2 import os import re def preprocessing(): all_meta = [] w = open('results.txt', 'w', encoding = 'utf-8') for el in os.listdir('news'): with open(os.path.join('news',el), 'r', encoding = 'Windows-1251') as f: article = f.read() sentences = re.findall(r'', article) template = '{} {}\n' w.write(template.format(el, len(sentences))) author = re.findall(r'', article) authorstr = author[0] authorstr = re.sub('', '', authorstr) topic = re.findall(r'', article) topicstr = topic[0] topicstr = re.sub('', '', topicstr) meta = el+','+authorstr+','+topicstr+'\n' all_meta.append(meta) words = [] wordsraw = re.findall('.+', article) for el in wordsraw: wordsrawstr = el wordsrawstr = re.sub('', '', wordsrawstr) wordsrawstr = re.sub('', '', wordsrawstr) wordsrawstr = re.sub('`', '', wordsrawstr) wordsrawstr = wordsrawstr.lower() words.append(wordsrawstr) bigrams = [] for ind in range(1, len(words) - 1): bigrams.append(' '.join([words[ind - 1], words[ind]])) w.close() return bigrams, all_meta def data(all_meta): w = open('metadata.csv', 'w', encoding = 'utf-8') w.write('Название файла,Автор,Тематика текста\n') for el in all_meta: w.write(el) w.close() def bigram_processing(bigrams): w = open('bigrams_res.txt', 'w', encoding = 'utf-8') for el in bigrams: if re.match(r'(в|на|о|об|обо|при|по) .+(е|и|ах|ях)', el) != None: bigram = el + '\n' w.write(bigram) w.close() bigrams, all_meta = preprocessing() bigram_processing(bigrams) data(all_meta) w=input('Введите слово: ') while w == '': w=input('Попробуйте еще раз: ') border = 1 for i in range (len(w) // 2): print (w[border:len(w) - border]) border += 1 import re def print_forms(): with open('rudin.txt', 'r', encoding='utf-8') as f: text = f.read() wordarr = text.split() sit_arr = [] for word in wordarr: word.lower() word.strip(',...!?-–— :,') t = re.match('си(жу|д(е(ть|в((ши)?й?)?|л(а|о|и)?)|и(те?|м|шь)?|я(т|щий)?))', word) if (t != None) and (word not in sit_arr): sit_arr.append(word) for el in sit_arr: print (el) print_forms() import random def create_dict(): with open('db.txt', 'r', encoding = 'utf-8') as f: db = f.read() phrases = db.split('\n') clues = dict() keys = [] phrase_split = [] for phrase in phrases: phrase_split = phrase.split() clues[phrase_split[len(phrase_split) - 1]] = phrase_split[0:len(phrase_split) - 1] keys.append(phrase_split[len(phrase_split) - 1]) return clues, keys def show(clues, keys, shown): key = random.choice(keys) while key in shown: key = random.choice(keys) clue_arr = clues[key] for el in clue_arr: print(el, end = ' ') guess = input() if guess.lower() == key: check = True else: check = False return check, key def result(check): congrats = ['Поздравляю!', 'horoshego dnya!', 'Угадали!', 'Верно!', 'Хорошо сработано!'] condolences = ['Попробуйте еще раз!', 'Не отчаивайтесь, продолжайте!', 'Почти в точку... у вас есть еще попытка!', 'nichego, zavtra otgadaete!', 'escho chut-chut...'] if check == True: print(random.choice(congrats)) else: print(random.choice(condolences)) def run(): shown = [] for i in range (10): clues, keys = create_dict() check, key = show(clues, keys, shown) result(check) if check == True: shown.append(key) run() print('Всего доброго!') import re def change(): with open('aves.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = re.sub(r'\bптице.\b', r'рыбо.', text) text = re.sub(r'\bПтице.\b', r'Рыбо.', text) text = re.sub(r'\bптиц', r'\bрыб', text) text = re.sub(r'\bПтиц', r'\bРыб', text) with open('fish.txt', 'w', encoding = 'utf-8') as f: f.write(text) print('Текст записан в файл fish.txt') change() import re def search(): with open('chuvash.html', 'r', encoding = 'utf-8') as f: source = f.read() search_arr = source.split('', el) codearr = re.split('', el[1]) result = codearr[0] return result def record(result): with open('blank.txt', 'w', encoding = 'utf-8') as f: f.write(result) f.close() print('Трехбуквенный код языка записан в файл blank.txt') result = search() record(result) print('Введите число') n=float(input()) while n<2: print ('Отсутствует степени 2, не превышающие', n, 'Пожалуйста, введите число не меньше двух.') n=float(input()) d=2 print ('Степени числа 2, не превышающие', n, ':', end=' ') while d<=n: print(d, end=' ') d*=2 def opentext (title): with open(title, 'r', encoding='utf-8') as f: text = f.read() arr = [] arr = text.split() for elem in arr: elem.lower() elem.strip('!-./?"", ') return arr def firstletter(letter, arr): wordsarr = [] for elem in arr: if letter == elem[0:2]: wordsarr.append(elem) return wordsarr def questions(): file_name = input('Введите путь к файлу: ') minlen = int(input('Введите минимальную длину слова: ')) arr = opentext(file_name) un_words = firstletter('un', arr) return minlen, un_words def count(minlen, un_words): wordslen = [] n = 0 for elem in un_words: for letter in elem: n += 1 if n > minlen: wordslen.append(elem) n = 0 print('Количество слов, начинающихся с un:', len(un_words)) print('Процент слов длинее', minlen, ':', len(wordslen)/len(un_words)*100) minlen, un_words = questions() count(minlen, un_words) import os import shutil import re def countfolders(): obj = os.listdir() folders = [el for el in obj if os.path.isdir(el)] result = [] for folder in folders: if r'[a-z]|[A-Z]' and r'[а-яё]|[А-ЯЁ]' in folder: result.append(folder) print(folder) print('Всего папок, удовлетворяющих условию:', len(result)) countfolders() import os def mostfiles(): number = {root : len(files) for root, dirs, files in os.walk('.')} c = 0 folder = '' for root in number: if number[root] > c: c = number[root] folder = root print('Количество файлов в папке по адресу', folder, ':', c) mostfiles() text = open('exomars.txt','r',encoding='utf-8') arr = [] countline = 0 countall = 0 symb = 0 for line in text: countall += 1 arr = line.split( ) for el in arr: if el == '—': symb += 1 countwords = len(arr) - symb if countwords > 5: countline += 1 text.close() print('Всего строк:', countall,'Строк с числом слов больше 5:', countline, 'Процент:', round(countline*100/countall), '%') arr = [] arr1 = [] i = 0 print('Пожалуйста, введите 8 слов') while i != 8: word = input() arr.append(word) i += 1 i = 0 while i <= 6: pair = arr[i] + arr[i+1] arr1.append(pair) i += 2 for el in arr1: print (el) import random def adj(): adj_arr = [] contadj = open('esenin_adj_pl.txt', 'r', encoding='utf-8') for line in contadj: line_lc = line.capitalize().strip() adj_arr.append(line_lc) contadj.close() return random.choice(adj_arr) def noun(): noun = [] contnoun = open('spi_noun_pl.txt', 'r', encoding='utf-8') for line in contnoun: line_lc = line.lower().strip() noun.append(line_lc) contnoun.close() return random.choice(noun) def verb(): verbs = [] contverbs = open('majakovsky_verbs.txt', 'r', encoding='utf-8') for line in contverbs: line_lc = line.capitalize().strip() verbs.append(line_lc) contverbs.close() return random.choice(verbs) def adv(): adv = [] contadverb = open('pushkin_adverbs.txt', 'r', encoding='utf-8') for line in contadverb: line_lc = line.lower().strip() adv.append(line_lc) contadverb.close() return random.choice(adv) def prop(): prop = [] contprop = open('properties.txt', 'r', encoding='utf-8') for line in contprop: line_lc = line.lower().strip() prop.append(line_lc) contprop.close() return random.choice(prop) def line1(): syll = 0 while syll != 5: syll = 0 first = adj() + ' ' + noun() for letter in first: if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': syll += 1 return first def line2(): syll = 0 while syll != 7: syll = 0 second = verb() + ' ' + adv() + ' ' + adv() + random.choice(['!','?','.','...']) for letter in second: if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': syll += 1 return second def line4(): syll = 0 while syll != 7: syll = 0 fourth = verb() + ' ' + adv() + random.choice(['!','?','.','...']) for letter in fourth: if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': syll += 1 return fourth def line5(): syll = 0 person=['Я','Ты'] while syll != 7: syll = 0 fifth = random.choice(person) + ' ' + prop() + ' ' + adv() + random.choice(['!','?','.','...']) for letter in fifth: if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя': syll += 1 return fifth print(line1()) print(line2()) print(line1()) print(line4()) print(line5()) print('Введите три числа') a,b,c=float(input()), float(input()), float(input()) div=a/b deg=a**b if div==c: print ('Результат деления А на B равен С') else: print ('Результат деления А на B НЕ равен С') if deg==c: print ('А в степени B равно С') else: print ('А в степени B НЕ равно С') import re def preproc(): with open('text.txt', 'r', encoding = 'utf-8') as f: text = f.read() allsent = re.split(r'[\.\?\!]', text) allsent = [sent.lower() for sent in allsent] allsent = [re.sub(r'[,—“\':”\(\)]', '', sent) for sent in allsent] return allsent def count(sent): num = {word : sent.count(word) for word in sent} several = {word : num[word] for word in num if num[word]>1} if several == {}: several = {'Повторяющихся слов' : '0'} return several def display(several): print('Следующее предложение: ') template = '{:^10} {:^10}' for keyword in several: print(template.format(keyword, several[keyword])) allsent = preproc() for sentence in allsent: arr = re.split(r' ', sentence) several = count(arr) display(several) import re def opencount(): with open('corp.xml', 'r', encoding = 'utf-8') as f: text = f.readlines() c = 0 for line in text: line.strip('\s') if '' not in line: c += 1 else: break numheader = str(c) + '\n' return numheader, text def create_dict(text): newdict = {} typearr = [] allmorphs = [] for line in text: if '') allmorphs.append(morph1[0]) keys = [] for el in allmorphs: if el not in keys: keys.append(el) for key in keys: num = allmorphs.count(key) newdict[key] = num return newdict def writenum(c, newdict, neutrum, csvarr): with open('result.txt', 'w', encoding = 'utf-8') as f: f.write(c) for key, freq in newdict.items(): string = str(key) + ':' + str(freq) + '\n' f.write(string) line = '' for el in neutrum: line += el + ', ' line += '\n' f.write(line) for el in csvarr: f.write(el) print('Записано.') def search_pro_n(text): neutrum = [] for line in text: q = re.search('type="f.h', line) if q != None: form1 = line.split('">') form2 = form1[1].split('') arr1 = arr[1].split('') arrlines = arr1[0].split('\n') for line in arrlines: if '', ', ', line) line = re.sub('', '\n', line) csvarr.append(line) return csvarr c, text = opencount() newdict = create_dict(text) neutrum = search_pro_n(text) csvarr = wholecorpora() writenum(c, newdict, neutrum, csvarr) file = open ("цитаты1.txt", "r", encoding = "utf-8") stroki = 0 for line in file: arr = line.split('—') ar = arr[0].split() if len(arr) > 0: if len(ar) < 10: print (arr[0]) a = float(input ('Введите a:')) b = float(input ('Введите b:')) c = float(input ('Введите c:')) U1 = U4 = False if a * b == c: U1 = True print ('Выполняется условие 1') if a * c + b == 0: U4 = True print ('Выполняется условие 4') if U1 and U4: print ('Выполняются условия 1 и 4') else: if U1 == False and U4 == False: { print ('НЕ выполняется ни одно из условий 1 или 4') } print ('Для завешения нажмите ENTER') ENTER = input('') import re def get_text(fn): a = [] with open(fn, 'r', encoding = "utf-8") as f: for line in f: a.append(line) return a def main(): text = get_text('Санкт-Петербург — Википедия.html') reg = '
]*?>(UTC[+-]?\d{1,2}:?\d{0,2})' for ti in text: m = re.search(reg, ti) if m != None: print(m.group(1)) return m.group(1) def record(): r = main() f = open("result.txt","w") f.write("Часовой пояс - " + r) f.close() record() def open_text(): with open('Austen Jane. Pride and Prejudice.txt', "r", encoding = "utf-8") as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip('.,!?-;:“"”''') return arr def isness(word): Ret = 0 if len(word) > 4: if word[-4:] == 'ness': Ret = 1 else: Ret = 0 return Ret def AddInList(word, List, Qn): Yes = 0 for i in range(len(List)): if (List[i] == word): Qn[i] +=1 Yes = 1 if (Yes == 0): List.append(word) Qn.append(1) Inarr = open_text() List = list() Qn = list() for i in range (len(Inarr)): if isness(Inarr[i]) == 1: AddInList(Inarr[i], List, Qn) print('Количество разных сущ. с суффиксом -ness равно: ' + str(len(List))) Max = 0 Ind = 0 for i in range(len(List)): if Qn[i] > Max: Ind = i Max = Qn[i] print('Максимальную частотность имеет слово: ' + List[Ind] + ', с частотностью: ' + str(Qn[Ind])) import os import re def papka(): folder = [f for f in os.listdir('.')if not re.search(r'[0-9]+',f)if os.path.isfile(f)] print(len(folder)) return folder papka() def dop(): arr = [] astr = 0 exist = 0 folder = [f for f in os.listdir('.')] for p in range(len(folder)): for j in range(len(folder[p])): if folder[p][j] == '.': astr = folder[p][0:j] exist = 0 for k in range(len(arr)): if arr[k] == astr: exist = 1 if exist == 0: arr.append(astr) return arr print(dop()) word = input ("Введите слово на кириллице:") i = 0 while i < len(word): if word[i] == 'п' or word[i] == 'о' or word[i] == 'е': print (word[i]) i = i+2 print ("Для завершения работы нажмите ENTER") ENTER = input ('') import re def open_text(): with open('Викинги — Википедия.html', "r", encoding = "utf-8") as f: text = f.read() return text def replacement(): result1 = re.sub('викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'бурундук\\1', open_text()) result2 = re.sub('Викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'Бурундук\\1', result1) return result2 def record(): r = replacement() f = open("result.txt","w", encoding = "utf-8") f.write(r) f.close() return f record() import re import os import csv def first(): reg = '' for i in os.listdir('.'): if i.endswith('.xhtml'): m = [] with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t: text = t.read() for t in re.findall(reg, text): m.append(t) with open('new_text.txt', 'a', encoding = 'utf-8') as f: f.write(i+'\t'+str(len(m)) + '\n') first() def second(): for i in os.listdir('.'): reg = '' with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t: text = t.read() for t in re.findall(reg, text): if re.search('', text): with open('table.csv', 'a', encoding = 'utf-8') as f: f.write(i+','+re.search('', text)) second() import random def read_words(filename): file = open(filename, "r", encoding = "utf-8") arr = [] for line in file: arr += line.strip().split(', ') file.close() return arr def verb(number): if number == 's': return random.choice(read_words("singular_verbs.txt")) else: return random.choice(read_words("plural_verbs.txt")) def noun(number): if number == 's': return random.choice(read_words("singular_nouns.txt")) else: return random.choice(read_words("plural_nouns.txt")) def clinoun(): return random.choice(read_words("clitic_noun.txt")) def adverb(): return random.choice(read_words("adverb.txt")) def punctuation(): return random.choice(read_words("punctuation.txt")) def verse1(): return clinoun() + ' ' + noun('s') + ' ' + adverb() + ' ' + verb('s') + punctuation() def verse2(): return noun('pl') + ' ' + verb('pl') + ' ' + adverb() + ' ' + clinoun() + punctuation() def verse3(): return noun('s') + ' ' + adverb() + ' ' + clinoun() + ' ' + verb('s') + punctuation() def make_verse(): verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(make_verse()) def open_text(): with open('green.txt', "r", encoding = "utf-8") as f: text = f.read() arr = text.split('.') return arr def deli(): txt = open_text() for i, w in enumerate(txt): for s in '.,!?-;:“"”''()«»–': txt[i] = txt[i].replace(s, "") return txt def des(): txt = deli() dlina = [x for x in txt if len(x.split()) > 10] return dlina def big(): txt = des() f = [] for i in txt: f += [x for x in i.split() if x[0].isupper()] return f print (big()) file = open("text.txt", "r", encoding = "utf-8") lmin = lmax = len(file.readline()) for line in file: lp = len(line) if lp > 0: if lmin > lp: lmin = lp if lmax < lp: lmax = lp print (lmax / lmin) def open_text_1(): with open('islandcorp.xml', "r", encoding = "utf-8") as f: Line = 0 for i in f: if i != '\n': Line += 1 else: break return Line def record(): with open("result1.txt","w", encoding = "utf-8") as f: f.write(str(open_text_1())) return import re def keys(): with open('islandcorp.xml', "r", encoding = "utf-8") as f: text = f.read() Dic = {} reg = '.*?' res = re.findall(reg, text) for i in range(len(res)): if res[i] not in Dic: Dic[res[i]] = 1 else: Dic[res[i]] += 1 return Dic def record1(): with open("result2.txt","w", encoding = "utf-8") as f: a = keys() for key in a: f.write(key + ',' + str(a[key])+ '\n') record() record1() import os import re def main(): Sum = 0 for root, dirs, files in os.walk('.'): for d in dirs: cir = 0 for i in range(len(d)): a = re.search(r'[а-яёЁ А-Я]+',d[i]) if a == None: cir = 1 if cir == 0: Sum += 1 return Sum print(main()) arr =[] word = input("Введите слово: ") while word: arr.append(word) word = input ("Введите слово: ") w = 0 for w in range (len(arr)): if len(arr[w]) > 5: print (arr[w]) print ("Для завершения работы нажмите ENTER") ENTER = input ('') import re def open_text(): with open('txtfind.txt', "r", encoding = "utf-8") as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip('.,!?-;:“"”''') return arr def find_in_text(): List = list() regex = '\W?(на(((й((д(у(т(ся)?)?|ёшь(ся)?|ёт(ся|е(сь)?)?|ём(ся)?|и|ите(сь)?|я|енный|ены))|ти(сь)?)))|(ш(ёл(ся)?|л(а|и|о)(сь)?|едш(и|(ий|ая|ее)(ся)?)))))\W?' words = open_text() for i in range (len(words)): m = re.search(regex, words[i]) if m != None: List.append(words[i]) return List uList = list() List = find_in_text() for i in range(len(List)): Include = 0 for j in range(len(uList)): if uList[j] == List[i]: Include = 1 if Include == 0: print(List[i]) uList.append(List[i]) word = ('abracadabra') i=0 while i <= len(word): print (word[0:i]) i = i+1 import random def get_words(fn): words = {} with open(fn, 'r') as fd: for line in fd: word, collocations = line.split(',', 1) words[word] = collocations.replace(word, '.'*len(word)).split(',') return words def ask_riddle(words_dict): words = list(words_dict.keys()) rnd_word = random.choice(words) rnd_collocation = random.choice(list(words_dict[rnd_word])) print(rnd_collocation) word = input('Пропущенное слово:') return rnd_word, word == rnd_word def main(): words = get_words('f3.csv') word, answer = ask_riddle(words) print('И это правильный ответ!' if answer else 'Вы ошиблись, правильный ответ: '+ word) return word, answer main() import re def main(): s = '' f = open("Викинги.html","r",encoding="utf-8") for line in f: line = re.sub("в(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","бурундук\\2",line) line = re.sub("В(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","Бурундук\\2",line) s = s + line f.close() return s def record(): s = main() f = open("results.txt","w",encoding='utf-8') f.write(s) f.close() record() a=int(input('input a number1: ')) b=int(input('input a number2: ')) c=int(input('input a number3: ')) print('\na=',a,'\nb=',b,'\nc=',c) if a*b==c: print('\nПроизведение чисел a и b равно числу c') else: print('\nПроизведение чисел a и b не равно c') if a*c+b==0: print('Число c является решением линейного уравнения a*x+b=0') else: print('Число c не является решением линейного уравнения a*x+b=0') print('\nЧтобы завершить программу, нажмите Enter') ENTER=input('') import random def phrase(): f0 = open("plus1.txt","r",encoding="utf-8") pr1 = f0.read().split() p1 = random.choice(pr1) f1 = open("plus2.txt","r",encoding="utf-8") pr2 = f1.read().split() p2 = random.choice(pr2) return p1 + ' ' + p2 def adjective(): f2 = open("adj.txt","r",encoding="utf-8") adj = f2.read().split() return random.choice(adj) def verb(): f3 = open("verb.txt","r",encoding="utf-8") v = f3.read().split() return random.choice(v) def noun(num): f4 = open("sg.txt","r",encoding="utf-8") nounsg = f4.read().split() f5 = open("pl.txt","r",encoding="utf-8") nounpl = f5.read().split() f6 = open("ind.txt","r",encoding="utf-8") nounind = f6.read().split() if num == 'pl': return random.choice(nounpl) if num == 'ind': return random.choice(nounind) return random.choice(nounsg) def punctuation(): puncts = [".", "?", "!", "...",";"] return random.choice(puncts) def verse1(): return phrase() + ' ' + noun("sg") + ' ' + noun("pl") + punctuation() def verse2(): return verb() + ', ' + verb() + ' ' + noun("ind") + punctuation() def verse3(): return noun("sg") + ' ' + adjective() + ' ' + noun("pl") + punctuation() def doit(): verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(doit()) import os import re def texts(name): f = open(name, 'r') text = f.read() x = re.findall('.+', text) f.close() return x def resutls(s,fname): f = open(fname,"w",encoding = "utf-8") f.write(s) f.close() def words(): s = "" for roots, dirs, files in os.walk('.'): for file in files: if file.endswith('.xhtml'): s = s + file + "\t"+ str(len(texts(os.path.join(roots,file)))) + "\n" results(s,"result1.txt") if __name__ == '__words__': words() import os def main(): num = 0 for root, dirs, files in os.walk('.'): for d in dirs: k = 0 for i in d: if i not in "йцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮ": k += 1 if k == 0: num += 1 return num if __name__ == '__main__': print(main()) import random def words(): f = open("1.csv","r",encoding="utf-8") a = f.read().split(',') m = [] for n in a: b = n.rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ m.append(b) return m def d(): m = words() d = {} for i in m: a = i.split() d[a[0]] = a[1] return d def rand(): m = words() di = d() mas = [] for n in di.keys(): mas.append(n) v = random.choice(mas) return v def attempt(): di = d() v = rand() j = 0 for i in di[v]: j += 1 print(v,'.'*j) s = input() if s == di[v]: result = "you win" else: result = "you lose" return result print(attempt()) s=input("введите слово: ") i=0 for letter in s: if (i+1)%2!=0 : if s[i]=='о' or s[i]=='п' or s[i]=='е': print(s[i]) i=i+1 print('\nЧтобы завершить программу, нажмите Enter') ENTER=input('') import re def text(): a=[] f = open("Санкт-Петербург.html","r",encoding="utf-8") for line in f: a.append(line) return a def main(): a=text() s='' p1 = int; p2 = int regex = '"[A-Z][A-Z][A-Z](\+|-)?[0-9][0-9]?:?[0-90-9]?"' for line in a: b=line.split() for i in b: res = re.search(regex,i) if res != None: p1 = i.find('>') p2 = i.find('<') s=i[p1+1:p2] return s def record(): s=main() f = open("result.txt","w") f.write("Часовой пояс - "+s) f.close() record() def names(): import os m = os.listdir('.') return m def main(): m = names() newm = [] num = 0 for i in m: k = 0 for j in i: if j in '1234567890': k += 1 if k == 0: num += 1 if '.' in i: i = i[:i.index('.')] if i not in newm: newm.append(i) print('num = {}'.format(num)) print(newm) if __name__ == '__main__': main() def text(): f = open("ness.txt","r",encoding="utf-8") a = f.read().split() m = [] for n in a: b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ m.append(b) return m def ness(m): mas = [] s = "" for i in m: if i[-4:] == 'ness': if i not in s: mas.append(i) s = s + i + " " return mas def numb(): m = text() mas = ness(m) return len(mas) def main(): m = text() b = ness(m) mas2 = [] fr = "" s = "" for i in m: if i[-4:] == 'ness': s = s + i + " " for n in b: mas2.append(s.count(n)) maxi = mas2[0] for j in mas2: if j > maxi: maxi = j for n in b: if s.count(n) == maxi: fr=fr+" "+n return fr print("Количество разных слов на -ness =",numb(),"\nСамое(ые) частотное(ые) -",main()) import re def lines(): f = open('vim4.txt','r',encoding='utf-8') a = f.read() c = re.split(r'[.?!]',a) lines = [' '.join([word.strip('.,<>/?""-=_+''""[]{}()*&^%$ return lines def main(): sents = lines() results = [] for line in sents: k = '' k = ['+' for w in line.split()] if len(k) > 10: for w in line.split(): if w.istitle() == True: results.append(w) return results if __name__ == '__main__': print(main()) import re def text(): f = open("portrait.txt","r",encoding="utf-8") a = f.read().split() m = [] for n in a: b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ m.append(b) return m def main(): m = text() regex = 'на(й|ш(е|ё)?)(т|д|л)(ш|енн?)?(а?я?|(и|о|ы|(е|ё)|ую?)?(т|шь)?(ся)?(м(у|и)?|го|е|й|х)?)?' s = '' for i in m: res = re.search(regex,i) if res != None: k = 0 for j in i: if j not in regex: k += 1 if k == 0: if i not in s: s = s + i + ' ' return s print(main()) import re def opp(): k = 0 f = open("it.xml","r",encoding="utf-8") for line in f: k += 1 f.close() return k def record1(): f = open('result1.txt','w',encoding='utf-8') f.write(str(opp())) f.close() record1() def dic(): d = {} regex1 = 'lemma="' regex2 = 'type="[a-zþ0-9]+"' f = open("it.xml","r",encoding="utf-8") for line in f: if re.search(regex1,line) != None: res = re.search(regex2,line) if res != None: p1 = line.rfind('"') p2 = line.find('type=') s = line[p2+6:p1] if s in d.keys(): d[s] += 1 else: d[s] = 1 return d def record2(): d = dic() f = open('result1.txt','a',encoding='utf-8') for i in d.keys(): f.write('\n'+i) f.close() record2() def plur(): d = {} regex1 = 'lemma="' regex2 = 'type="[a-zþ0-9]+"' f = open("it.xml","r",encoding="utf-8") for line in f: if re.search(regex1,line) != None: res = re.search(regex2,line) if res != None: p1 = line.rfind('"') p2 = line.find('type=') s = line[p2+6:p1] if s[0] == 'l' and s[2] == 'f': if s in d.keys(): d[s] += 1 else: d[s] = 1 return d def record3(): d = plur() f = open('result2','w',encoding='utf-8') for i in d.keys(): f.write(i+' - '+str(d[i])+'\n') f.close() record3() f=open("new1.txt","r",encoding = "utf-8") mx=mn=len(f.readline()) for line in f: if line != "\n": if len(line) > mx: mx = len(line) if len(line) < mn: mn = len(line) print(mx/mn) f.close() f=open("text1.txt","r",encoding = "utf-8") for line in f: sym=line.split(" ") if sym[2]=="союз": print(line) f.close() f=open("text1.txt","r",encoding = "utf-8") s = input("Введите слово: ") m = [] while s!='': m.append(s) s=input("Введите слово: ") for i in m: for line in f: sym = line.split(" ") if i == sym[0]: print(i,sym[1:]) else: print(i+" - в словаре нет такого слова") break f.close() f=open("text1.txt","r",encoding = "utf-8") s=0 for line in f: sym=line.split(" ") if sym[4]=="ед" and sym[5]=="жен": print(sym[0]+",") s=s+float(sym[-1]) print(s) f.close() m=[] s=input('введите слово: ') while s!='': m.append(s) s=input('введите слово: ') for word in m: if len(word)>5: print(word) print('Чтобы завершить программу, нажмите ENTER') ENTER=input('') word=input('введите слово: ') newword='' for letter in word: newword=newword+letter print(newword) import re import os def countsent(file): sent = 0 s = open (file,'r') lines = s.readlines() for line in lines: if re.search('',line): sent = sent + 1 return sent def file_countsent(): cw = open ('countsent.txt','w',encoding='utf-8') for root, dirs, files in os.walk('news'): for f in files: cw.write(f+'\t'+str(countsent(os.path.join(root, f)))+'\n') def text_data(txt1): topic = re.search(r'', txt1).group(1) author = re.search(r'', txt1).group(1) data = [author, topic] return data def csv(data, name): with open(name, 'a', encoding='cp1251') as f: f.write(data[2]+'\t'+data[0]+'\t'+data[1]+'\n') def supertable(): data1 = [] for root, dirs, files in os.walk('news'): for f in files: with open(os.path.join(root, f), 'r', encoding='cp1251') as m: txt = m.read() data = text_data(txt) data.append(f) data1.append(data) for data in data1: csv(data, 'supertable.csv') file_countsent() supertable() import re def openfile(): file1 = input('Введите путь к файлу: ') with open(file1, "r", encoding="utf-8") as f: arr = [] lines = f.readlines() for line in lines: if line.strip() == '': break else: arr.append(line) print('Число строк заголовка', len(arr)) def dictionary(): file2 = input('Введите путь к файлу: ') with open(file2, "r", encoding="utf-8") as f: dictn = {} text = f.read() findtype = re.findall(r'type="\w+">', text) for i in findtype: i = i[6::].strip('">') if i not in dictn: dictn[i] = 1 else: dictn[i] += 1 file3 = input('Введите путь к файлу, куда будет записана информация из словаря: ') with open(file3, "r", encoding="utf-8") as f: for key in dictn: f.write(str(key, dictn[key])) openfile() dictionary() with open('ugadaika.csv', 'r', encoding = 'utf-8') as f: words = [] a = f.read() words = a.split(',') dic = {} for i, word in enumerate(words): if i%2 == 0: dic[word] = words[i+1] print('Я хочу сыграть с тобой в одну игру... Какое слово я загадал? Количество точек равно количеству букв в слове.') for key in dic: print(dic[key]) b = input() if b == key: print('Молодчинка!!!') else: print ('Ты не очень умный, я загадал не это.') mylist = [] with open('proga.txt', 'r', encoding='utf-8') as f: for line in f.readlines(): x = len(line) mylist.append(x) mini = mylist[0] maxi = mylist[0] for i in mylist: if i <= mini: mini = i if i > maxi: maxi = i print(maxi/mini) import random def adj(): a=[] with open ('adj.txt','r',encoding='utf-8') as f: a=f.read() return random.choice(a.split()) def Petya(): b=[] with open ('nouns_like_Petya.txt','r',encoding='utf-8') as f: b=f.read() return random.choice(b.split()) def kustik(): k=[] with open ('nouns_like_kustik.txt','r',encoding='utf-8') as f: k=f.read() return random.choice(k.split()) def prep(): c=[] with open ('prep.txt','r',encoding='utf-8') as f: c=f.read() return random.choice(c.split()) def adjfem(): d=[] with open ('adjfem.txt','r',encoding='utf-8') as f: d=f.read() return random.choice(d.split()) def nounfem(): e=[] with open ('nounfem.txt','r',encoding='utf-8') as f: e=f.read() return random.choice(e.split()) def verb(): g=[] with open ('verbpf.txt','r',encoding='utf-8') as f: g=f.read() return random.choice(g.split()) def punct(): h=[] with open ('punct.txt','r',encoding='utf-8') as f: h=f.read() return random.choice(h.split()) def verse1(): return adj() + ' ' + Petya() + ' ' + verb() + ' ' + kustik() + punct() def verse2(): return prep() + ' ' + adjfem() + ' ' + nounfem() + punct() def verse3(): return adj() + ' ' + kustik() + ' ' + verb() + ' ' + Petya() + punct() def verse4(): return Petya() + ' ' + verb() + ' ' + nounfem() + punct() def make_verse(): verse = random.choice([1,2,3,4]) if verse == 1: return verse1() elif verse == 2: return verse2() elif verse == 3: return verse3() else: return verse4() for n in range(4): print(make_verse()) import os import re nonum = [] num = [] for f in os.listdir('.'): if re.search('[1234567890]', f): num.append(f) else: nonum.append(f) print('Файлов, не содержащих цифр в названии: ', len(nonum)) print('Введите число a и нажмите Enter') a=int(input()) print('Введите число b и нажмите Enter') b=int(input()) print('Введите число c и нажмите Enter') c=int(input()) if a*b==c: print(c ,'является произведением', a,' и ', b) else: print(c ,' не является произведением', a,' и ', b) if c*a==(-1)*b: print(c,'является решением линейного уравнения', a,'x +',b,'= 0') else: print(c,'не является решением линейного уравнения', a,'x +',b,'= 0') b=1 int (b) a=(input()) for i in a: if (b%2)&((i=='о')or(i=='п')or(i=='е')): print (i) b+=1 import re def vikings(): wikifile = input('Время альтернативной истории! Введите имя файла со статьей про викингов: ') with open(wikifile, 'r', encoding = 'utf-8') as f: wikiarticle = f.read() return wikiarticle def change1(wikiarticle): myarticle1 = re.sub('викинг', 'бурундук', wikiarticle) return myarticle1 def change2(myarticle1): myarticle2 = re.sub('Викинг', 'Бурундук', myarticle1) return myarticle2 def chimpunks(myarticle2): newfile = input('Введите имя файла, куда следует поместить измененную статью: ') with open(newfile, 'w', encoding = 'utf-8') as f: f.write(myarticle2) def go(): chimpunks(change2(change1(vikings()))) go() import re def findforms(): find = r"\bна(ш(ёл(ся)?|е(л(ся)?|дш(е(го(ся)?|м(ся|у(ся)?)?|е(ся)?|й(ся)?|ю(ся)?)|ую(ся)?|ая(ся)?|и(й(ся)?|е(ся)?|сь|м(и(ся)?)?|х(ся)?)?))|л(а(сь)?|о(сь)?|и(сь)?))|й(ти(сь)?|д(я(сь)?|у(сь|т(ся)?)?|ё(м(ся)?|шь(ся)?|т(ся|е(сь)?)?|нн(ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?)))|е(шь(ся)?|т(ся|е(сь)?)?|м(ся)?|н(а|о|ы|н((ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?))))?)|и(сь|те(сь)?)?)))\b" arr = [] with open("find.txt", "r", encoding="utf-8") as f: words = f.read() for word in words.split(): p = re.search(find, word) if p != None: if word not in arr: arr.append(word) for item in arr: print(item) findforms() a = [] s = str(input("Введите слово ")) while s != (""): if len(s) > 5: a.append(s) s = str(input("Введите слово ")) print('\n'.join(a)) def counting(): with open('isl.txt', 'r', encoding='utf-8') as islen: islen.read() str = islen.readline().replace('\n', '') islenlines = [] islencount = 0 for line in islen: islenlines.append islencount = 0 if '' in line: break print(islencount) counting() def dictionary(): lemmas = [] alsolemmas = [] str = islen.readline for i in range(str): if '(.*?)' links = re.findall(reg, content) return links text = open_html('butterflies.html') links = find_links(text) for link in links[:20]: print(link[1], '-->', link[0]) d = {"Россия":'Москва', "Польша":'Варшава', "США":'Вашингтон', "Болгария":'София', "Армения":'Ереван', "Бразилия":'Бразилиа', "Испания":'Москва'} def delete_doubles(d): arr = [] new = {} for key in d: if d[key] in arr: else: new[key] = key arr.append(d[key]) return a delete_doubles(d) import re def open_html(fname): with open (fname, 'r', encoding='utf-8') as f: text = f.read() return text def tags(text): m = re.sub(r'<.*?>', r'', text) t = re.sub(r'\s+',r' ', m) s = re.sub(r'Илон Маск', r'Маленький котёнок',t) return s print(tags(open_html('musk.html'))) import re rain = r"\b\дожд([ьюи]|е|ей|я(м|ми?)|ях|ём?)?\b" s = input('Введите какое-нибудь слово: ') m = re.search(rain, s) if m != None: print('Это слово является формой слова "дождь"!') else: print('Нетушки!') import codecs def open_file(file_name): f = codecs.open(file_name, 'r', 'utf-8-sig') words = [] for line in f: line = line.strip() words += line.split() for word in words: word = word.strip(u'.,!?:;()\'\"1234567890') word = word.lower() return words def bigramms(words): bi = create_list(words) dic = {} for j in bi: if j not in dic: dic[j] = 1 else: dic[j] += 1 answer = '' answer = [n + '\r\n' for n in dic] print(answer) return answer def create_list(words): bi = [] for i in range(len(words)): if i < (len(words) - 1): j = i+1 bi.append(words[i] + words[j]) return bi words = open_file('text.txt') bigramms(words) import re with open('news.txt', 'r', encoding = 'utf-8') as f: text = f.read() punct = '[.,?!:;"\'—@–...«» tabs = '[\t\n]' def preprocessing(text): text = text.strip().lower() text = re.sub(punct, '', text) text = re.sub(tabs, ' ', text) words = text.split() return words words = preprocessing(text) def make_freq(arr): d = {} for el in arr: try: d[el] += 1 except KeyError: d[el] = 1 return d word_freq = make_freq(words) def make_bigrams(arr): bigrams = [] for i in range(len(words)): bigr = arr[i] + ' ' + arr[i + 1] bigrams.append(bigr) return bigrams bigrams = make_bigrams(words) bigrams_freq = make_freq(bigrams) from math import log def count_pmi(x, y): bigr = x + ' ' + y try: p_x = word_freq[x]/len(words) except KeyError: p_x = 0 try: p_y = word_freq[y]/len(words) except KeyError: p_y = 0 try: p_xy = bigrams_freq[bigr]/len(bigrams) except KeyError: p_xy = 0 try: pmi = log(p_xy/(p_x*p_y)) except ZeroDivisionError: pmi = 0 return pmi def calculate_pmi(): pmis ={} for bigr in bigrams: x, y = bigr.split() pmi = count_pmi(x, y) pmis[bigr] = pmi return pmis pmi = calculate_pmi() i = 0 for el in sorted(pmi, key = lambda m: -pmi[m]): if i > 100: break print(el, pmi[el]) i += 1 import os corpus_anek = '' corpus_izvest = '' corpus_teh = '' for root, dirs, files in os. walk('texts'): if 'anekdots' in root: for f in files: with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: text = f1.read() corpus_anek += text if 'teh_mol' in root: for f in files: with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: text = f1.read() corpus_teh += text if 'izvest' in root: for f in files: with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: text = f1.read() corpus_izvest += text print(corpus_teh[:100]) words_anek = preprocessing(corpus_anek) words_teh = preprocessing(corpus_teh) words_izvest = preprocessing(corpus_izvest) words_all = words_anek + words_teh + words_izvest freq_anek = make_freq(words_anek) freq_teh = make_freq(words_teh) freq_izvest = make_freq(words_izvest) freq_all = make_freq(words_all) def count_pmi_cats(word, category): p_word = freq_all[word]/len(words_all) p_cat = 1/3 if category == 'anek': d = freq_anek w = len(words_anek) elif category == 'izvest': d = freq_izvest w = len(words_izvest) elif category == 'teh': d = freq_teh w = len(words_teh) p_word_cat = d[word]/w pmi = log(p_word_cat/(p_word*p_cat)) return pmi for w in words: if i > 100: break try: pmi_anek = count_pmi_cats(w, 'anek') pmi_izvest = count_pmi_cats(w, 'izvest') pmi_teh = count_pmi_cats(w, 'teh') max_pmi = max(pmi_anek, pmi_izvest, pmi_teh) if max_pmi == pmi_anek: print(w, 'anek') elif max_pmi == pmi_izvest: print(w, 'izvest') elif max_pmi == pmi_teh: print(w, 'teh') except KeyError: pass i += 1 import os import re from math import log punct = '[.,!«»?&@"$\[\]\(\):;% tabs = '[\t\n]' def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) text_wo_punct = re.sub(tabs, ' ',text_wo_punct) words = text_wo_punct.strip().split() return words def count_tf(word, text): n = text.count(word) return n / len(text) def count_df(word, texts): i = [True for text in texts if word in text] i = sum(i) return i def count_idf(word, texts): df = count_df(word, texts) try: idf = len(texts) / df except ZeroDivisionError: return 0 return idf def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) tfidf = log(tf, 10) * log(idf, 10) return tfidf def keywords(text, texts): keywords = {} dic_tfidf = {} for word in text: if word in dic_tfidf: continue tfidf = count_tfidf(word, text, texts) dic_tfidf[word] = tfidf i = 0 for el in sorted(dic_tfidf, key = lambda x: dic_tfidf[x]): if i > 5: break i += 1 keywords[el] = dic_tfidf[el] return keywords def main(): texts = {} for root, dirs, files in os.walk('wikipedia'): for f in files: with open(os.path.join(root, f),'r', encoding = 'utf-8') as t: content = t.read() text = preprocessing(content) texts[f] = text raw_texts = list(texts.values()) for t in texts: print('\nИзвлекаем ключевые слова для текста {}'.format(t)) kwords = keywords(texts[t], raw_texts) for key in kwords: print(key, kwords[key]) if __name__ == '__main__': main() print ("Здравствуйте!"\ ) a = int(input("Введите число a: ")) b = int(input("Введите число b: ")) c = int(input("Введите число c: ")) if a + b == c: print ("Числа a и b в сумме дают число c") else: print ("Числа a и b в сумме НЕ дают число c") if c == -b / a: print ("Число c является решением линейного уравнения ax + b = 0") else: print ("Число c НЕ является решением линейного уравнения ax + b = 0") import re import os def folders(): counter = 0 numbers = '[0-9]' titles = os.listdir('.') for i in titles: if os.path.isdir(i) and re.search (numbers, i): counter += 1 return str(counter) def names(): print('Все файлы и(или) папки в текущей папке: ') arr = [] res = '\..+' for i in os.listdir('.'): name = i if os.path.isdir(i): name = re.sub(res, '', i) if name not in arr: arr.append(name) for each in arr: if each: print(each + '\n') else: print('None') print('Количество папок с цифрами в названии в текущей папке: ' + folders()) names() def open_read(): num = 0 with open('F.xml', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: num += 1 return num def write_doc(num): numlines = str(num) with open ('Number.txt', 'w', encoding = 'utf-8') as new_doc: new_doc.write(numlines) print('Количество строк: ' + numlines + '\n' + 'Создан документ Number.txt') def main(): write_doc(open_read()) main() import os a = {} def dict_new(): for root, dirs, files in os.walk('.\\news'): for file in files: with open (os.path.join(root, file), 'r', encoding = 'cp1251') as page: raw_text = page.read() a[file] = raw_text.count('(.*?)', text, flags = re.DOTALL) cap = cap.group(3) return cap def write_doc(cap): with open ('Capital.txt', 'w', encoding = 'utf-8') as new_doc: new_doc.write(cap) print('Столица данной страны: ' + cap + '\n' + 'Создан документ Capital.txt') def main(): write_doc(capital(open_read())) main() word = input() text = [] while word: text.append(word) word = input() for i in range(len(text)): new = text[i] new = new[::-1] new = list(new) for t in range (len(new)): if (t + 1) % 3 == 0: new[t] = '' wrd = ''.join(new) print(wrd) with open('Master and Margarita.txt','r', encoding = 'utf-8') as MM: tablewords = [] space = 0 lines = MM.readlines() print(' ', *lines) for i in range(len(lines)): for k in range(len(lines[i])): if lines[i][k] == ' ': space += 1 tablewords.append(space + 1) space = 0 number = 0 for l in range(len(tablewords)): number += tablewords[l] averword = number/len(lines) print('\n','Среднее количество слов в строке =',averword) word = input('Введите слово: ') print(word) for i in range(len(word)): print(word[:-(1+i)]) import random with open('allwords.txt', 'r', encoding = 'utf-8') as aw: lines = aw.readlines() def noun_m1(): noun_m1 = [] noun_m1 = lines[1].split(' ') return random.choice(noun_m1) def noun_f1(): noun_f1 = [] noun_f1 = lines[2].split(' ') return random.choice(noun_f1) def noun_m2(): noun_m2 = [] noun_m2 = lines[3].split(' ') return random.choice(noun_m2) def noun_f2(): noun_f2 = [] noun_f2 = lines[4].split(' ') return random.choice(noun_f2) def noun_mid2(): noun_mid2 = [] noun_mid2 = lines[5].split(' ') return random.choice(noun_mid2) def noun_m3(): noun_m3 = [] noun_m3 = lines[6].split(' ') return random.choice(noun_m3) def noun_f3(): noun_f3 = [] noun_f3 = lines[7].split(' ') return random.choice(noun_f3) def noun_mid3(): noun_mid3 = [] noun_mid3 = lines[8].split(' ') return random.choice(noun_mid3) def noun_m4(): noun_m4 = [] noun_m4 = lines[9].split(' ') return random.choice(noun_m4) def noun_f4(): noun_f4 = [] noun_f4 = lines[10].split(' ') return random.choice(noun_f4) def noun_mid4(): noun_mid4 = [] noun_mid4 = lines[11].split(' ') return random.choice(noun_mid4) def noun_m5(): noun_m5 = [] noun_m5 = lines[12].split(' ') return random.choice(noun_m5) def noun_f5(): noun_f5 = [] noun_f5 = lines[13].split(' ') return random.choice(noun_f5) def noun_mid5(): noun_mid5 = [] noun_mid5 = lines[14].split(' ') return random.choice(noun_mid5) def noun_m6(): noun_m6 = [] noun_m6 = lines[15].split(' ') return random.choice(noun_m6) def noun_f6(): noun_f6 = [] noun_f6 = lines[16].split(' ') return random.choice(noun_f6) def noun_mid6(): noun_mid6 = [] noun_mid6 = lines[17].split(' ') return random.choice(noun_mid6) def verb_1(): verb_1 = [] verb_1 = lines[20].split(' ') return random.choice(verb_1) def verb_2(): verb_2 = [] verb_2 = lines[21].split(' ') return random.choice(verb_2) def verb_3(): verb_3 = [] verb_3 = lines[22].split(' ') return random.choice(verb_3) def verb_4(): verb_4 = [] verb_4 = lines[23].split(' ') return random.choice(verb_4) def verb_5(): verb_5 = [] verb_5 = lines[24].split(' ') return random.choice(verb_5) def verb_6(): verb_6 = [] verb_6 = lines[25].split(' ') return random.choice(verb_6) def conj_1(): conj_1 = [] conj_1 = lines[28].split(' ') return random.choice(conj_1) def conj_2(): conj_2 = [] conj_2 = lines[29].split(' ') return random.choice(conj_2) def adj_m1(): adj_m1 = [] adj_m1 = lines[32].split(' ') return random.choice(adj_m1) def adj_m2(): adj_m2 = [] adj_m2 = lines[33].split(' ') return random.choice(adj_m2) def adj_f2(): adj_f2 = [] adj_f2 = lines[34].split(' ') return random.choice(adj_f2) def adj_m3(): adj_m3 = [] adj_m3 = lines[35].split(' ') return random.choice(adj_m3) def adj_f3(): adj_f3 = [] adj_f3 = lines[36].split(' ') return random.choice(adj_f3) def adj_mid3(): adj_mid3 = [] adj_mid3 = lines[37].split(' ') return random.choice(adj_mid3) def adj_m4(): adj_m4 = [] adj_m4 = lines[38].split(' ') return random.choice(adj_m4) def adj_f4(): adj_f4 = [] adj_f4 = lines[39].split(' ') return random.choice(adj_f4) def adj_mid4(): adj_mid4 = [] adj_mid4 = lines[40].split(' ') return random.choice(adj_mid4) def adj_m5(): adj_m5 = [] adj_m5 = lines[41].split(' ') return random.choice(adj_m5) def adj_f5(): adj_f5 = [] adj_f5 = lines[42].split(' ') return random.choice(adj_f5) def adj_mid5(): adj_mid5 = [] adj_mid5 = lines[43].split(' ') return random.choice(adj_mid5) def adj_m6(): adj_m6 = [] adj_m6 = lines[44].split(' ') return random.choice(adj_m6) def adj_f6(): adj_f6 = [] adj_f6 = lines[45].split(' ') return random.choice(adj_f6) def adj_mid6(): adj_mid6 = [] adj_mid6 = lines[46].split(' ') return random.choice(adj_mid6) def adv_2(): adv_2 = [] adv_2 = lines[49].split(' ') return random.choice(adv_2) def adv_3(): adv_3 = [] adv_3 = lines[50].split(' ') return random.choice(adv_3) def adv_4(): adv_4 = [] adv_4 = lines[51].split(' ') return random.choice(adv_4) def adv_5(): adv_5 = [] adv_5 = lines[52].split(' ') return random.choice(adv_5) def adv_6(): adv_6 = [] adv_6 = lines[53].split(' ') return random.choice(adv_6) def numeral_m2(): numeral_m2 = [] numeral_m2 = lines[56].split(' ') return random.choice(numeral_m2) def numeral_f2(): numeral_f2 = [] numeral_f2 = lines[57].split(' ') return random.choice(numeral_f2) def numeral_mid2(): numeral_mid2 = [] numeral_mid2 = lines[58].split(' ') return random.choice(numeral_mid2) def numeral_m3(): numeral_m3 = [] numeral_m3 = lines[59].split(' ') return random.choice(numeral_m3) def numeral_f3(): numeral_f3 = [] numeral_f3 = lines[60].split(' ') return random.choice(numeral_f3) def numeral_mid3(): numeral_mid3 = [] numeral_mid3 = lines[61].split(' ') return random.choice(numeral_mid3) def numeral_m4(): numeral_m4 = [] numeral_m4 = lines[62].split(' ') return random.choice(numeral_m4) def numeral_f4(): numeral_f4 = [] numeral_f4 = lines[63].split(' ') return random.choice(numeral_f4) def numeral_mid4(): numeral_mid4 = [] numeral_mid4 = lines[64].split(' ') return random.choice(numeral_mid4) def numeral_m5(): numeral_m5 = [] numeral_m5 = lines[65].split(' ') return random.choice(numeral_m5) def numeral_f5(): numeral_f5 = [] numeral_f5 = lines[66].split(' ') return random.choice(numeral_f5) def numeral_mid2(): numeral_mid5 = [] numeral_mid5 = lines[67].split(' ') return random.choice(numeral_mid5) def numeral_f6(): numeral_f6 = [] numeral_f6 = lines[68].split(' ') return random.choice(numeral_f6) def numeral_mid6(): numeral_mid6 = [] numeral_mid6 = lines[69].split(' ') return random.choice(numeral_mid6) def row_1_5(): phrase_of_5_1 =[adj_m1() + ' ' + noun_m4(), adj_m2() + ' ' + noun_m3(), adj_m3() + ' ' + noun_m2(), adj_m4() + ' ' + noun_m1(), numeral_m2() + ' ' + noun_m1() + ' ' + verb_2(), numeral_m2() + ' ' + noun_m2() + ' ' + verb_1(), numeral_m2() + ' ' + noun_m3(), numeral_m3() + ' ' + noun_m1() + ' ' + verb_1(), numeral_m3() + ' ' + noun_m2(), adj_f2() + ' ' + noun_f3(), adj_f3() + ' ' + noun_f2(), adj_f4() + ' ' + noun_f1(), numeral_f2() + ' ' + noun_f1() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f2() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f3(), numeral_f3() + ' ' + noun_f1() + ' ' + verb_1(), numeral_f3() + ' ' + noun_f2(), numeral_mid2() + ' ' + verb_2(), numeral_mid2() + ' ' + noun_mid2() + ' ' + verb_1(), numeral_mid2() + ' ' + noun_mid3(), numeral_mid3() + ' ' + verb_1(), numeral_mid3() + ' ' + noun_mid2(),noun_m5(), noun_f5(), noun_mid5()] return random.choice(phrase_of_5_1) def row_1_7(): phrase_of_7_1 =[adv_2() + ' ' + verb_5(), adv_3() + ' ' + verb_4(), adv_4() + ' ' + verb_3(), adv_5() + ' ' + verb_2(), adv_6() + ' ' + verb_1(), adv_2() + ' ' + verb_4() + ' ' + conj_1(), adv_2() + ' ' + verb_3() + ' ' + conj_2(), adv_3() + ' ' + verb_3() + ' ' + conj_1(), adv_3() + ' ' + verb_2() + ' ' + conj_2(), adv_4() + ' ' + verb_2() + ' ' + conj_1(), adv_4() + ' ' + verb_1() + ' ' + conj_2(), adv_5() + ' ' + verb_1() + ' ' + conj_1(), adv_5() + ' ' + conj_2(), adv_6() + ' ' + conj_1()] return random.choice(phrase_of_7_1) def row_2_5(): phrase_of_5_2 =[verb_1() + ' ' + noun_m4(), verb_2() + ' ' + noun_m3(), verb_3() + ' ' + noun_m2(), verb_4() + ' ' + noun_m1(), verb_1() + ' ' + noun_f4(), verb_2() + ' ' + noun_f3(), verb_3() + ' ' + noun_f2(), verb_4() + ' ' + noun_f1(), verb_1() + ' ' + noun_mid4(), verb_2() + ' ' + noun_mid3(), verb_3() + ' ' + noun_mid2()] return random.choice(phrase_of_5_2) def row_2_7(): phrase_of_7_2 =[noun_m1() + ' ' + verb_6(),noun_m2() + ' ' + verb_5(),noun_m3() + ' ' + verb_4(),noun_m4() + ' ' +verb_3(), noun_m5() + ' ' + verb_2(), noun_m6() + ' ' + verb_1(), noun_f1() + ' ' + verb_6(), noun_f2() + ' ' + verb_5(), noun_f3() + ' ' + verb_4(), noun_f4() + ' ' + verb_3(), noun_f5() + ' ' + verb_2(), noun_f6() + ' ' + verb_1(), noun_mid2() + ' ' + verb_5(), noun_mid3() + ' ' + verb_4(), noun_mid4() + ' ' + verb_3(), noun_mid5() + ' ' + verb_2(), noun_mid6() + ' ' + verb_1()] return random.choice(phrase_of_7_2) def row_3_5(): phrase_of_5_3 =[verb_5(), adv_5()] return random.choice(phrase_of_5_3) def haiku(): ready = [row_2_5() + '\n' + row_2_7() + '\n' + row_1_5(), row_3_5() + '\n' + row_2_7() + '\n' + row_3_5(), row_1_5() + '\n' + row_1_7() + '\n' + row_3_5()] return random.choice(ready) print(haiku()) word = input ('give a word') lenghth = len(word) z = 0 newword ='space' while newword != '': newword = '' newword = word[z:lenghth] print(newword) z += 1 lenghth -= 1 import re def sentences(): with open ('text.txt','r',encoding = 'utf-8') as f: text = f.read() m = re.findall('[^.!?]{1,}?[.?!]', text) m= [sent.split() for sent in m] for sentence in m: for i in range(len(sentence)): sentence[i] = sentence[i].strip('!?.,;:"').lower() return m def output(m): maxi = max([len(word) for sentence in m for word in sentence]) sentence_number = 0 for sentence in m: sentence_number += 1 print ('предложение №', sentence_number) words = [] for word in sentence: if word not in words: words.append(word) j = 0 for i in range(0, len(sentence) - 1): if word == sentence[i]: j += 1 if j > 1: print('{:^{maxi}} {:^2}'.format(word,j, maxi = maxi)) output(sentences()) import csv import random def open_file(): with open('some.csv', 'r') as f: a =[] reader = csv.reader(f) for line in reader: a.append(line) return a def dictionary(a): d = {} for i in range(0,5): d[a[0][i]] = a[1][i] return d def answer(d,a): word = random.choice(list(d.values())) for key in d: if d[key] == word: print('твоя подсказка:',key) while True: ans = input('введи слово') if ans == word: return random.choice(a[2]) else: print(random.choice(a[3])) print('мы загадали слово для тебя') print(answer(dictionary(open_file()),open_file())) import re def open_text(): words = [] with open('text.txt', 'r', encoding ='utf-8') as f: text = f.read().lower() text = text.split() for item in text: item = item.strip('.,?!-') if item not in words: words.append(item) return words def answer(words): for item in words: m = re.match( r'\bси(д(и(шь|те?|м)?|е(л(о|а|и)?|в(ш(и(й|ми?|е|х)?|е(го|му?|е|й|ю)|ая|ую))?|ть)|я(т|щ(и(й|ми?|е|х)|е(го|му?|е|й|ю)|ая|ую))?)|жу)\b', item) if m != None: print(item) sit = answer(open_text()) quantity = 0 percent = 0 f = open('newy.txt','r',encoding ='utf-8') for line in f: quantity += 1 a = line.split() if len(a) > 5: percent += 1 else: continue a = [] f.close() if percent == 0 or quantity == 0: print(' no lines like this') else: print ('the number of lines:', percent / quantity * 100) import os def walking(): d = {root : len(files) for root, dirs, files in os.walk('.')} maxi = max(d.values()) for key in d: if d[key] == maxi: print ('there are',maxi,'files in',key) walking() import re def open(): with open('ptitsi.html','r', encoding = 'utf-8') as f: content = f.read() return content def substitute(content): content = re.sub('<.*?>','', content, flags = re.DOTALL) content = re.sub(r'(\n| ){2,}','' ,content, flags = re.DOTALL) content = re.sub('птиц(а(ми?|х)|ы|е(й|ю)?|у)?','рыб\\1', content) content = re.sub('Птиц(а(ми?|х)|ы|е(й|ю)?|у)?','Рыб\\1', content) return content def write(content): with open('text.txt','w', encoding = 'utf-8') as f: f.write(content) print(write(substitute(open())) import random def imperative(): with open('imperatives.txt', 'r',encoding = 'utf-8') as f: imperatives =[] for line in f: newword = line.strip() imperatives.append(newword) return random.choice(imperatives) def noun_acc(): with open('nouns_Acc_Sg&Pl.txt', 'r',encoding = 'utf-8') as f: noun_accs =[] for line in f: newword = line.strip() noun_accs.append(newword) return random.choice(noun_accs) def ins_phrase(): with open('clitics_Ins.txt', 'r',encoding = 'utf-8') as f: clitics = [] for line in f: newword = line.strip() clitics.append(newword) with open('nouns_Ins.txt', 'r',encoding = 'utf-8') as g: noun_inss = [] for line in g: newword = line.strip() noun_inss.append(newword) return random.choice(clitics) + ' ' + random.choice(noun_inss) def noun_pl(): with open('nouns_ Nom=Acc_Pl.txt', 'r',encoding = 'utf-8') as f: noun_pls = [] for line in f: newword = line.strip() noun_pls.append(newword) return random.choice(noun_pls) def noun_sg(): with open('nouns_Nom=Acc_Sg.txt', 'r',encoding = 'utf-8') as f: noun_sgs = [] for line in f: newword = line.strip() noun_sgs.append(newword) return random.choice(noun_sgs) def verb(): with open('verbs_Pl.txt', 'r',encoding = 'utf-8') as f: verbs = [] for line in f: newword = line.strip() verbs.append(newword) return random.choice(verbs) def adverb(): with open('adverbs.txt', 'r',encoding = 'utf-8') as f: adverbs = [] for line in f: newword = line.strip() adverbs.append(newword) return random.choice(adverbs) def punctuation(): marks = ['.', '!', '...'] return random.choice(marks) def type1(): return imperative() + ' ' + noun_acc() + punctuation() def type2(): return noun_pl() + ' ' + verb() + punctuation() def type3(): return imperative() + ' ' + ins_phrase() + punctuation() def type4(): return noun_pl() + ' ' + verb() + ' ' + noun_pl() + punctuation() def type5(): return noun_pl() + ' ' + verb() + ' ' + noun_sg() + punctuation() def type6(): return ins_phrase() + ' ' + imperative() + ' ' + noun_sg() + punctuation() def type7(): return imperative() + ' ' + noun_acc() + ' ' + adverb() + punctuation() def tanka(i): line ='' if (i == 1) or (i == 3): line = random.choice([1,2,3]) if line == 1: line = type1() if line == 2: line = type2() if line == 3: line = type3() else: line = random.choice([4,5,6,7]) if line == 4: line = type4() if line == 5: line = type5() if line == 6: line = type6() if line == 7: line = type7() return line def printing(): for i in range(1,6): print(tanka(i)) a = printing() def open_text(text): with open(text, 'r', encoding ='utf-8') as f: text = f.read().lower() words = text.split() return words def percent(words, number): i,j = 0,0 for item in words: if item[0:2] =='un': i+=1 if len(item) > number: j +=1 if i != 0: print('the number of words:', i) return str(round(j / i * 100)) + '%' else: return 'no matching words were found' def questions(): text = input(' Please, enter the name of the text') number = int(input(' Please, enter the lenght')) words = open_text(text) answer = percent(words, number) return answer print('your result is', questions()) n = int(input( )) w = 0 i = 0 while w <= n: w = 2**i i += 1 if w % 2 == 0 and w <= n: print (w) import re def open_text(): with open('archi.html','r', encoding = 'utf-8') as f: text = f.read() return text def search(text): m = re.search(r'title="Коды языков".*?title="ISO (\d\d\d)"',text, flags = re.DOTALL) return m.group(1) def write(z): with open('archi.txt','w', encoding = 'utf-8') as f: f.write(z) archi = write(search(open_text())) import os import re def search(): count = 0 a =[] for f in os.listdir(): if os.path.isdir(f) and f not in a: lat = re.search('.*[a-zA-z].*', str(f)) rus = re.search('.*[а-яА-ЯЁё].*', str(f)) if lat != None and rus != None: count+=1 a.append(f) if count == 1: print('1 dir was found', end = '') else: print (count, 'dirs were found ', end ='') if a != [] : print( ':'+', '.join(a)) search() count = 0 arr = ['','','',''] while count < 4: s = input('vvedi slovo') arr [ int(count)] += s s = '' count += 0.5 for i in range (0,4): print(arr[i]) a = int(input('введи а')) b = int(input('введи b')) c = int(input('введи с')) if a / b == c: print('а разделить на b равно с') else: print('а разделить на b не равно с') if a ** b == c: print(' а в степени b равно c') else: print(' а в степени b не равно с') with open ('hw5.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () list_1 = [] for line in lines: line = line.split() n = len (line) list_1.append (n) sum_list = 0 sum_line = 0 for elem in list_1: if elem > 5: sum_list += 1 sum_line += 1 else: sum_list += 1 percent = (sum_line / sum_list) * 100 print (percent, '% строк содержит больше 5 слов') import re def opentext(text): with open(text, 'r', encoding = 'utf-8') as f: sentences = f.read() text = re.sub('\.(\.\.)?|\?', '!', sentences) list_ = text.split('!') return list_ def text_format(text): text = opentext(text) text1 = [re.sub('( - )|( — )|( ‒ )', ' ', i) for i in text] sents = [sent.split() for sent in text1] sents2 = [[i.strip('.,?!":; sents3 = [[i.lower() for i in sent] for sent in sents2] return sents3 def search(text): sentences = text_format(text) repeated = [[w for w in sent if sent.count(w) > 1] for sent in sentences] return repeated def count(text): a = search(text) b = opentext(text) for i in range(len(a)): if a[i]: print (str(b[i]) + '\n') c = {w : a[i].count(w) for w in a[i]} keys = c.keys() for key in keys: print ('{:^10}'.format(key) + '{:^10}'.format(c[key])) text = input('Введите название файла: ') count(text) import re def opentext(text): with open(text, 'r', encoding = 'utf-8') as f: text = f.readlines() list_ = [] for line in text: line = line.split() list_.extend(line) words = [] for i in range(len(list_)): a = list_[i] a = a.strip('.,?!"":;*()%$ words.append(a) return words def find_form(): form = 'си((жу)|д((и((шь)|м|(те?))?)|(е((ть)|(л(а|и|о)?)|(в(ш((и(й|е|х|(ми?))?)|(е((го)|(му?)|й|е)?)|(ая)|(ую))))))|(я(щ((и(й|(ми?)|х|е))|(е((го)|(му?)|й|е))|(ая)|(ую)))?)))' form2 = 'буд((ут?)|(е(м|(шь)|(те?))))' words = opentext(text) forms = [] for i in range(len(words)): m = re.search(form, words[i]) if m != None: if words[i] == 'сидеть' and re.search(form2, words[i-1]) != None: form_fut = words[i-1] + ' ' + words[i] if form_fut not in forms: forms.append(form_fut) else: continue else: if words[i] not in forms: forms.append(words[i]) else: continue else: continue return forms text = input('Введите название файла: ') m = find_form() print ('Формы глагола "сидеть", встретившиеся в тексте:') for i in range(len(m)): print (m[i], end = '\n') l = [] for i in range(8): l.append (input()) print (l[0]+l[1]) print (l[2]+l[3]) print (l[4]+l[5]) print (l[6]+l[7]) import os import re def list_files(path): files_list = [] for d, dirs, files in os.walk(path): for f in files: path_f = os.path.join(d, f) files_list.append(path_f) return files_list def open_file(f): with open(f, 'r', encoding = 'utf-8') as k: text = k.readlines() return text def count_sent(path): files = list_files(path) list_sent = {} for f in files: b = re.search('(_.*?.xhtml)', f) f_name = b.group(1) sent = 0 file_text = open_file(f) for line in file_text: if re.search('', line) != None: sent = sent + 1 list_sent[f_name] = sent return list_sent def file_format_sent(path): sent = count_sent(path) with open('task1.txt', 'w', encoding = 'utf-8')as k: for key in sent.keys(): k.write(key + '\t' + str(sent[key]) + '\n') def inf(f): text = open_file(f) inf = {} for line in text: author = re.search('content="(.*?)" name="author"', line) if author != None: author1 = author.group(1) for line in text: topic = re.search('content="(.*?)" name="topic"', line) if topic != None: topic1 = topic.group(1) inf[author1] = topic1 return inf def create_csv(path): files = list_files(path) with open('task2.csv', 'w', encoding = 'utf-8') as k: for f in files: infa = inf(f) f_name = re.search('(_.*?.xhtml)', f).group(1) for key in infa.keys(): k.write(str(f_name) + '\t' + str(key) + '\t' + str(infa[key]) + '\n') def pr_loc(f): text = open_file(f) bigrams = [] for i in range(len(text)): pr = re.search('gr="PR"', text[i]) if pr != None: prep = re.search('(.*?)', text[i]).group(1) loc = re.search('"S.*?loc', text[i+1]) if loc != None: S_loc = re.search('(.*?)', text[i+1]).group(1) bigrams.append(prep + ' ' + S_loc) return bigrams def text_without_tegs(f): text = open_file(f) text_w_t = '' for line in text: if re.search('', line) != None: word = re.search('(.*?)', line).group(1) prep = re.search('(.)()?', line) if prep != None: if prep.group(1) == '.' or prep.group(1) == '!' or prep.group(1) == '?': text_w_t = text_w_t + ' ' + word + prep.group(1)+'\n' else: text_w_t = text_w_t + ' ' + word + prep.group(1) else: text_w_t = text_w_t + ' ' + word return text_w_t def bigr(path): files = list_files(path) with open('task3.txt', 'w', encoding = 'utf-8') as k: for f in files: for b in pr_loc(f): k.write(b + '\n') path = 'C:\\Users\\1\\Documents\\ниу вшэ\\КИЛИ и программирование\\python\\экзамен\\news' file_format_sent(path) create_csv(path) bigr(path) import random def adjective_Abl_m(): with open('adjective_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def adjective_Abl_f(): with open('adjective_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def noun_Abl_m(): with open('noun_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def noun_Abl_f(): with open('noun_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def noun_phrase(): with open('prepositions.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() prep = random.choice(line) while prep != 'в' and prep != 'к' and prep != 'с': prep = random.choice(line) if prep == 'в' or prep == 'к': with open('noun_verse1_prep1.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) else: with open('noun_verse1_prep2.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) return prep.title() + ' ' + noun1 def noun_Gen(): with open('noun_Gen_verse1.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def verse11(): return adjective_Abl_m().title() + ' ' + noun_Abl_m() def verse12(): return adjective_Abl_f().title() + ' ' + noun_Abl_f() def verse13(): return noun_phrase() + ' ' + noun_Gen() def participle_adj(): with open('participle_adjective_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def subject(): with open('subject_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def place(): with open('places_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split(', ') return random.choice(line) def obj_f(): with open('adjective_obj_verse2_f.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() adj = random.choice(line) with open('object_verse2_f.txt', 'r', encoding = 'utf-8') as k: objects = k.readlines() for obj in objects: obj = obj.split() obj = random.choice(obj) return adj + ' ' + obj def obj_m(): with open('object_verse2_m.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() obj1 = random.choice(line) with open('object_Gen_verse2_m.txt', 'r', encoding = 'utf-8') as k: objects = k.readlines() for obj in objects: obj = obj.split() obj2 = random.choice(obj) with open('adjective_obj_verse2_m.txt', 'r', encoding = 'utf-8') as l: adjectives = l.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) return adj + ' ' + obj2 + ' ' + obj1 def verse21(): return participle_adj().title() + ' ' + subject() + ' ' + place() + '.' def verse22(): with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() verb = random.choice(line) return verb.title() + ' ' + obj_f() def verse23(): with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() verb = random.choice(line) return verb.title() + ' ' + obj_m() def verb_feel(): with open('verb_feelings.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def verse31(): with open('prepositions.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() prep = random.choice(line) with open('base_noun_verse3.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() base_noun = random.choice(noun) if prep == 'под' or prep == 'над': if base_noun == 'мор' or base_noun == 'солнц': noun = base_noun + 'ем' else: noun = base_noun + 'ом' elif prep == 'у' or prep == 'от' or prep == 'из': if base_noun == 'мор': noun = base_noun + 'я' else: noun = base_noun + 'а' elif prep == 'при' or prep == 'на': noun = base_noun + 'е' elif prep == 'с': if base_noun == 'мор' or base_noun == 'солнц': noun = base_noun + 'ем' else: noun = base_noun + 'ом' prep = 'как с' elif prep == 'в': noun = base_noun + 'е' prep = 'как в' elif prep == 'к': if base_noun == 'мор': noun = base_noun + 'ю' else: noun = base_noun + 'у' prep = 'как к' else: if base_noun == 'мор': noun = base_noun + 'ю' else: noun = base_noun + 'у' return verb_feel().title() + ',' + ' ' + prep + ' ' + noun def verse32(): with open('participle_verse3.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() participle = random.choice(line) with open('subject_verse3.txt', 'r', encoding = 'utf-8') as k: subjects = k.readlines() for sub in subjects: sub = sub.split() subject = random.choice(sub) return participle.title() + ' ' + subject + '.' def verse41(): with open('noun_verse41_1.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() noun1 = random.choice(line) with open('prepositions.txt', 'r', encoding = 'utf-8') as k: preps = k.readlines() for preposition in preps: preposition = preposition.split() prep = random.choice(preposition) while prep == 'в' or prep == 'к' or prep == 'с': prep = random.choice(preposition) if prep == 'под' or prep == 'над': with open('noun_verse41_2.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'у' or prep == 'от' or prep == 'из': with open('noun_verse41_3.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'при': with open('noun_verse41_4.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'на': with open('noun_verse41_5.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) else: with open('noun_verse41_6.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) if noun1 == 'дрожь' or noun1 == 'ночь' or noun1 == 'сталь' or noun1 == 'тень' or noun1 == 'кровь' or noun1 == 'плеть': with open('verb_verse41_1.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) else: with open('verb_verse41_2.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) return noun1.title() + ' ' + prep + ' ' + noun2 + ' ' + verb1 + '.' def noun42(): with open('object_verse42.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def the_end_of_the_line(): with open('prepositions.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() line.append('во' and 'со' and 'ко') line.remove('под') line.remove('у') line.remove('от') line.remove('по') line.remove('из') prep = random.choice(line) if prep == 'во': with open('noun_verse42_1.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'со': noun = 'мной' elif prep == 'ко': with open('noun_verse42_2.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'при' or prep == 'на': if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'): with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'ней': noun2 = random.choice(noun) else: with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'нем': noun2 = random.choice(noun) elif prep == 'в': with open('noun_verse42_4.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'с': with open('noun_verse42_5.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'к': with open('noun_verse42_6.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) else: if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'): with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'ней': noun2 = random.choice(noun) else: with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'нем': noun2 = random.choice(noun) return prep.title() + ' ' + noun2 def verse42(): with open('pronoun_verse4.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() pronoun = random.choice(line) return verb_feel().title() + ' ' + noun42() + ' ' + pronoun + '... ' + the_end_of_the_line() def verse51(): with open('pronoun_verse5.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() pronoun = random.choice(line) if pronoun == 'вся' or pronoun == 'та': with open('adjective_verse5_f_4.txt', 'r', encoding = 'utf-8') as k: adjectives = k.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) elif pronoun == 'весь' or pronoun == 'тот': with open('adjective_verse5_m_3.txt', 'r', encoding = 'utf-8') as k: adjectives = k.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) with open('noun_verse5_m.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) else: with open('adjective_verse5_f_3.txt', 'r', encoding = 'utf-8') as k: adjectives = k.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) return pronoun.title() + ' ' + adj + ' ' + noun1 + '.' def verse52(): with open('parenthesis_verse5.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() parenthesis = random.choice(line) with open('noun_verse52.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) if noun1 == 'звезда' or noun1 == 'вуаль' or noun1 == 'туман': with open('verb_verse52_sg.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) else: with open('verb_verse52_pl.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) return parenthesis.title() + ' ' + noun1 + ' ' + verb1 + '?!' def poem(): variant = random.choice([1, 2, 3, 4, 5, 6]) if variant == 1: var = random.choice([1, 2]) if var == 1: return verse11() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() else: return verse12() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() elif variant == 2: var = random.choice([1, 2]) if var == 1: return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() else: return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() elif variant == 3: var = random.choice([1, 2, 3, 4]) if var == 1: return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() elif var == 2: return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() elif var == 3: return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() else: return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() elif variant ==4: return verse13() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() elif variant == 5: var = random.choice([1, 2]) if var == 1: return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() else: return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() else: var = random.choice([1, 2, 3, 4]) if var == 1: return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() elif var == 2: return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() elif var == 3: return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() else: return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() print (poem()) import re def open_file(): with open('Птицы.html', 'r', encoding = 'utf-8') as f: text = f.read() return text def sub_word(): word1 = '\\bпти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b' word2 = '\\bПти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b' s = re.sub(word1, 'рыб\\1', open_file()) m = re.sub(word2, 'Рыб\\1', s) return m def add_file(): with open('Замена.html', 'w', encoding = 'utf-8') as k: k.write(sub_word()) return k add_file() def data (year, month, day): if month > 12: return False else: if day >= 31: return False else: if day == 31 and (month == 2 or month == 4 or month == 9 or month == 11 or month == 6): return False else: if day == 30 and month == 2: return False else: if day == 29 and month == 2 and (year % 4 != 0 or (year % 100 == 0 and year % 1000 != 0)): return False elif day == 16 and month == 12 and year == 1998: print ("Вы угадали день рождения разработчика! Не забудьте его поздравить :)") else: return True year = input ("Введите год (натуральное число): ") month = input ("Введите месяц (натуральное число до 12 включительно): ") day = input ("Введите день (натуральное число до 31 включительно): ") while year and month and day: if data (int(year), int(month), int(day)) == True: print ("Такая дата есть в календаре:)") elif data (int(year), int(month), int(day)) == False: print ("Простите, но такой даты нету:(") else: print (data (int(year), int(month), int(day))) print ("Попробуем снова:)") year = input ("Введите год (натуральное число): ") month = input ("Введите месяц (натуральное число до 12 включительно): ") day = input ("Введите день (натуральное число до 31 включительно): ") print ("Все!:)") a = int (input ()) b = int (input ()) c = int (input ()) s = (a + 1) // 2 + (b + 1) // 2 + (c + 1) // 2 print (s) print (os.path.abspath('.')) print (os.getcwd()) os.path.join('texts', '1.txt') os.path.exists('texts') print (os.listdir('.')) s = 'hello' i = 1 texts = [f for f in os.listdir('.') if f.endswith('.txt')] print (texts) for f in os.listdir('.'): if f.endswith('.txt'): with open(f, 'a', encoding = 'utf-8') as w: w.write (s*i) i += 1 os.mkdir('corpus1') os.makedirs('a\\b\\long\\long') os.rename('texts\\1.txt', 'texts\\2.txt') os.path.isfile(r'texts\corpus1.txt') os.path.isdir(r'texts') shutil.copy(r'texts\2.txt', r'new_corpus\2.txt') shutil.move('откуда', 'куда') shutil.copytree('папка', 'папка2') os.remove(r'new_corpus\2.txt') shutil.rmtree('corpus') def align_right(arr): for i in arr: print ('{:>40}'.format(i)) arr = ['abba', 'assa', 'adda', 'affa'] align_right(arr) def tokenize(text): tokens = text.split() tokens1 = [t.strip('.,?!":;*()-— ') for t in tokens] tokens2 = [t.lower() for t in tokens1] return tokens2 text = 'Инициатива публикации лучших дисциплин исходила в том числе от Студсовета. Чуть ранее представители Студенческого совета получили возможность использовать результаты СОП при обсуждении возникающих проблем и спорных моментов. Теперь все студенты смогут использовать опубликованную информацию — агрегированное мнение своих предшественников — при формировании собственной индивидуальной образовательной траектории.' print(tokenize(text)) def tabulate(a): for i in range(0, len(a)): print('{:<10}'.format(a[i][0]) + '{:^10}'.format(a[i][1]) + '{:>10}'.format(a[i][2])) a = [('кошки','собаки','коровы'), ('мяу','гав','му'), (3,3,2)] tabulate(a) x = int (input ()) if x > 0: sign = 1 elif x < 0: sign = -1 else: sign = 0 print (sign) a = int (input ()) b = int (input ()) if a < b: print (a) else: print (b) x = int (input ('введите целое число x = ')) print ('вы ввели число', x) res = x*55/100+33 print ('результат вычислений x * 55 / 100 + 33 =', res) a = int (input ('введите длину первого катета a = ')) b = int (input ('введите длину второго катета b = ')) S = a * b / 2 print (S) import re def func1(regw, word1): word = input('Введите слово: ') m = re.search(regw, word) if m != None: return 'Данное слово является формой слова ' + word1 else: return 'Данное слово не является формой слова ' + word1 word1 = 'свобода' regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b' def if_any(s, regw): m = re.search(regw, s) s = s.split() p = [] for i in range(len(s)): m = re.search(regw, s[i]) if m != None: p = p.append(s[i]) else: continue return 'Слово встречается в тексте ' + len(p) + ' раз' s = 'Свободу попугаям!' print(if_any(s, regw)) import re import os import shutil import re def make_folders_sent(s): sent = s.split() b = '\\'.join(sent) os.makedirs(b) s = input('Пожалуйста, введите предложение (без знаков препинания!) \n') make_folders_sent(s) def make_folders_num(n): for i in range(1,n+1): os.mkdir(str(i)) for a in range(i): name = str(i) + '\\' + str(a+1) + '.txt' file = open(name, 'w', encoding = 'utf-8') file.write('Hello!') n = int(input('Пожалуйста, введите натуральное число \n')) make_folders_num(n) def count(): filelist = [f for f in os.listdir('.') if os.path.isfile(f)] exts = [] for f in filelist: ext = f.split('.')[-1] exts.append(ext) c = {e : exts.count(e) for e in exts} keys = c.keys() for key in keys: print('{:^10}'.format(key) + '{:^10}'.format(c[key])) count() name = input ('Введите ваше имя: ') age = input ('Сколько вам лет? ') colour = input ('Какой ваш любимый цвет? ') music = input ('Кто ваш любимый музыкальный исполнитель? ') dream = input ('Какова ваша заветная мечта? ') with open ('information.txt', 'w', encoding = 'utf-8') as f: f.write ('Информация о соседе\n') f.write (name + '\n' + age + '\n' + colour + '\n' + music + '\n' + dream) with open('Austen_Jane_Pride_and_Prejudice.txt', 'r', encoding = 'utf-8') as f: text = f.readlines() list_ = [] for line in text: line = line.split() list_.extend(line) print (list_) import re with open ('freq.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () for line in lines: if 'союз' in line: print (line) with open ('freq.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () a = [] for line in lines: line = line.split () if 'жен' in line and 'ед' in line: print (line[0], end = ', ') a.append (line[-1]) ipm_sum = 0 for elem in a: elem = float (elem) ipm_sum += elem print (ipm_sum) with open ('freq.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () word = input () while word: for line in lines: line = line.split() if word in line: print ('Морфологическая информация: ' + ' '.join (line[2:-2])) print ('IPM = ' + line[-1]) word = input () import random with open ('words.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() random.shuffle(lines) score = 0 for line in lines: line = line.strip () word, hint = line.split(' ', 1) response = input ('Какое слово я загадала?\n ' + 'Подсказка: ' + hint + ' ') if response == word: print ('Правильно, молодец!') score += 1 else: print ('А вот и нет, слово было ', word) with open ('scores.txt', 'w', encoding = 'utf-8') as n: percent = score / 5 * 100 n.write('Вот результат\n') n.write(str(percent) + '%') import re def func1(regw, word1): word = input('Введите слово: ') m = re.search(regw, word) if m != None: return 'Данное слово является формой слова ' + word1 else: return 'Данное слово не является формой слова ' + word1 word1 = 'свобода' regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b' def if_any(s, regw): m = re.search(regw, s) s = s.split() p = [] for i in range(len(s)): m = re.search(regw, s[i]) if m != None: p = p.append(s[i]) else: continue return 'Слово встречается в тексте ' + len(p) + ' раз' s = 'Свободу попугаям!' print(if_any(s, regw)) import re def opentext(a): with open(a, 'r', encoding = 'utf-8') as f: text = f.read() return text def delete_tags(): s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL) return s def delete_odd(): s = re.sub ('(\\s)+', '\\1', delete_tags()) return s name = input('Введите название файла: ') print (delete_odd()) n = int (input ()) hour = n // 60 if hour >= 24: k = hour // 24 hour = hour - k * 24 minute = n % 60 print (hour, minute) import re def opentext(a): with open (a, 'r', encoding = 'utf-8') as f: content = f.read() return content def find_all_links(): reg = r'(.*?)' links = re.findall(reg, opentext(a)) return links a = input('Введите название файла: ') def pictures(): reg = r'
(.*?)
' pictures = re.findall(reg, opentext(a)) return pictures pictures = pictures() print ('Подписи к картинкам: ') for picture in pictures: print (picture[2]) def opentext(fname): with open(fname, 'r', encoding = 'utf-8') as f: text = f.readlines() for line in text: line = line.split() list_ = [] for i in range (0, len(line)): a = line[i] a = a.lower() a = a.strip('.,?!";:"*()') list_.append(a) return list_ def first_letter(letter): fname = input('введите название файла: ') text = opentext(fname) words_letter = [] for i in range(len(text)): if text[i].startswith(letter) == True: words_letter.append(text[i]) else: continue return words_letter def questions(): letter = input('введите первую букву: ') number = int(input('введите число: ')) words = first_letter(letter) result = [] for i in range(len(words)): if len(words[i]) > number: result.append(words[i]) else: continue return result print (questions()) a = int (input ('введите первое число ')) b = int (input ('введите второе число ')) c = int (input ('введите третье число ')) s = a + b + c print (s) import os def drawtree(): for root, dirs, files in os.walk('C:\\Users\\1\\Documents\\ниу вшэ'): num = root.count('\\') new_root = root.split('\\')[-1] print('\t'*num+'--'+new_root+'\n') for f in files: print((num+1)*'\t'+f) drawtree() name = input () print ('Hello, ' + name + '!') import re def opentext(a): with open(a, 'r', encoding = 'utf-8') as f: text = f.read() return text def delete_tags(): s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL) return s def delete_odd(): s = re.sub ('(\\s)+', '\\1', delete_tags()) return s name = input('Введите название файла: ') print (delete_odd()) import re def opentext(a): with open (a, 'r', encoding = 'utf-8') as f: content = f.read() return content def find_all_links(): reg = r'(.*?)' links = re.findall(reg, opentext(a)) return links a = input('Введите название файла: ') def pictures(): reg = r'
(.*?)
' pictures = re.findall(reg, opentext(a)) return pictures pictures = pictures() print ('Подписи к картинкам: ') for picture in pictures: print (picture[2]) num = int (input ()) t = 1 while t * 2 <= num: t = t * 2 print (t) def opentext(fname): with open(fname, 'r', encoding = 'utf-8') as f: text = f.readlines() list_ = [] for line in text: line = line.split() list_.extend(line) words = [] for i in range(len(list_)): a = list_[i] a = a.lower() a = a.strip('.,?!";:"*()') words.append(a) return words def un_forms(): text = opentext(fname) words_un = [] for i in range(len(text)): if text[i].startswith('un') == True: words_un.append(text[i]) else: continue return words_un def quantity(): words = un_forms() return len(words) def percentage(number): words = un_forms() s = 0 for i in range(len(words)): if len(words[i]) > number: s += 1 else: continue result = s / len(words) * 100 return result fname = input('Введите название файла: ') number = int(input('Введите число: ')) print ('Количество слов с приставкой un- равно ', quantity()) print ('Процент слов с приставкой un- длинее ', number, ' равен ', percentage(number)) import os import shutil import re def all_files(): ff = os.listdir('.') file_names = [] for f in ff: if os.path.isfile(f): a = f.split('.') if a[-1].isdigit() or re.search(r'\s', a[-1]) != None: a = '.'.join(a) elif len(a) > 2: a[0] = '.'.join(a[:-1]) name = a[0] file_names.append(name) return file_names def all_dirs(): ff = os.listdir('.') dir_names = [] for f in ff: if os.path.isdir(f): name = f dir_names.append(name) return dir_names def all_without_rep(): names_file = all_files() names_dir = all_dirs() names = names_file + names_dir names_1 = [] for name in names: if name not in names_1: names_1.append(name) return names_1 def out_nice(): names = all_without_rep() print('Список папок и файлов в текущей директории: ') for name in names: print (name) def cyrill_latin_symb_fold(): names = all_dirs() lat = '[a-zA-Z]' cyr = '[а-яА-Я]' cyr_lat_dirs = [name for name in names if re.search(lat, name) != None and re.search(cyr, name) != None] return len(cyr_lat_dirs) out_nice() print ('Количество папок, содержащих и латинские, и кириллические символы, равно: ', cyrill_latin_symb_fold()) import random def file(): with open('dictionary.csv', 'r', encoding = 'utf-8') as f: lines = f.readlines() d = {} for line in lines: line = line.split(';') d[line[0]] = line[1].strip('\n') return d def right(): with open('Верные ответы.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = text.split('\n') return random.choice(text) def wrong(): with open('Неверные ответы.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = text.split('\n') return random.choice(text) def zagadka(d): keys = d.keys() keys = list(keys) key = random.choice(keys) print ('Подсказка: ' + key + '...') answer = input('Введите ответ: ') if answer == d[key]: return(right()) else: return(wrong() + ' Верный ответ ' + d[key] + '.') d = file() a = input('Хочешь поиграть? Введи "да" или "нет":)\n') while a == 'да': print(zagadka(d)) a = input('Хочешь сыграть еще раз?:) Введи "да" или "нет"\n') print ('До свидания!') a = int (input ()) b = int (input ()) c = int (input ()) if a / b == c: print (a, "разделить на", b, "равно", c) else: print (a, "разделить на", b, "не равно", c) if a ** b == c: print (a, "в степени", b, "равно", c) else: print (a, "в степени", b, "не равно", c) import os def max_dir(): a = {} for root, dirs, files in os.walk(os.path.abspath('.')): a[root] = len(files) max_v = max(a.values()) if max_v == 1: print('Наибольшее количество файлов (' + str(max_v) +' файл) в директориях: ') elif max_v == 2 or max_v == 3 or max_v == 4: print('Наибольшее количество файлов (' + str(max_v) +' файла) в директориях: ') else: print('Наибольшее количество файлов (' + str(max_v) +' файлов) в директориях: ') for key in a.keys(): if a[key] == max_v: print(key) max_dir() import re def open_file(a): with open(a, 'r', encoding = 'utf-8') as f: text = f.read() return text def find_ISO(): reg = 'ISO 639-3(\\w{3})' m = re.search(reg, open_file(a), flags = re.DOTALL) if m: ISO = m.group(2) return ISO else: return 'В статье не указано ISO 639-3' def add_file(): with open('Result.txt', 'a', encoding = 'utf-8') as k: k.write('\n') k.write(a) k.write(': ') k.write(find_ISO()) return k a = input('Введите название статьи в формате Название.html: ') add_file() word = input ('введите слово: ') for i in range (1,len(word)): print (word[i:-i]) with open('dict.csv', 'r', encoding='utf-8') as f: lines = f.readlines() a = dict() for line in lines: line = line.strip('\n') key, value = line.split(':', 1) a[key] = value for key in a: b = input('Угадай слово. Вот подсказка: '+a[key]) if b == key: print('Правильно!') else: t = 0 while b != key and t <= (len(key)-1): b = input('Неправильно, попробуй еще раз: ') t += 1 else: print('Правильно!') import os def greatestway(): depth = [] for root, dirs, files in os.walk('.', topdown=False): a = str(root).count('/') if a not in depth: depth.append(a) return max(depth) print(greatestway()) import re import os def openfile(): for root, dirs, files in os.walk('.\\news2'): for f in files: with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text: file_text = text.read() file_text = re.sub('<.*?>', '', file_text) file_text2 = file_text.split('.') count = len(file_text2) print(f, ' ', count) return def meta(): for root, dirs, files in os.walk('.\\news2'): for f in files: with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text: file_text = text.read() writer = re.match('', file_text).group(1) topic = re.match('', file_text).group(1) with open('.\\table.csv', 'w', encoding='utf-8') as csv_f: heading_string = 'Файл' + ' ' + 'Автор' + ' ' + 'Тема' csv_f.write(heading_string) with open('.\\table.csv', 'a', encoding='utf-8') as csv_writer: string = f + ' ' + writer + ' ' + topic csv_writer.write(string) return print(openfile()) print(meta()) import re def openf(): with open('F.xml', 'r', encoding='utf=8') as f: lines = f.readlines() return lines def countli(): lines = openf() linecount = 0 for line in lines: linecount += 1 results = 'result.txt' with open(results, 'w', encoding='utf-8') as n: n.write(str(linecount)) return results def dicfreq(): lines = openf() types = [] for line in lines: l = str(line) if 'lemma' in l: reg = re.search(r'', l) types.append(reg.group(2)) freq = {} for i in range(len(types)): if types[i] not in freq: freq[types[i]] = 1 else: freq[types[i]] += 1 with open('keys.txt', 'w', encoding='utf-8') as te: te.write('\n'.join(freq.keys())) return freq print(countli(), dicfreq()) import re fname = input('Введите название файла: ') def openfile(fname): with open(fname,'r', encoding='utf-8') as f: text = f.read() return text def sentences(): text = openfile(fname) text = text.strip() se = re.split('\\b[.!?\\n]+(?=\\s)', text) return se def find8(): se = sentences() greater7 = [] for i in se: words = i.split(' ') words = [str(w).strip('?!&(),.:;«»\n”“ ') for w in words] greater = [] greater += [w for w in words if len(w) > 7] template = '{} {:->10}' for g in greater: print(template.format(g,len(g))) return print(find8()) import re import os import shutil filename = [] unique = [] name = '' def numberinf(): number = 0 for f in os.listdir('REALEC'): name = str(f) b = re.sub(r'\.\D+', '', name) c = re.search(r'\d', b) if c != None: number += 1 return number def foldername(): for f in os.listdir('REALEC'): name = str(f) b = re.sub(r'\.\D+', '', name) filename.append(b) for n in filename: if n != '' and n not in unique: unique.append(n) return unique print(numberinf(), foldername()) import random with open('words.txt', 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: line = line.strip() def nm1(): nm1 = list() nm1 = lines[0] nm1 = nm1.strip() nm1 = nm1.split(' ') nm1.remove(nm1[0]) return random.choice(nm1) def nf1(): nf1 = list() nf1 = lines[1] nf1 = nf1.strip() nf1 = nf1.split(' ') nf1.remove(nf1[0]) return random.choice(nf1) def nm2(): nm2 = list() nm2 = lines[2] nm2 = nm2.strip() nm2 = nm2.split(' ') nm2.remove(nm2[0]) return random.choice(nm2) def nf2(): nf2 = list() nf2 = lines[3] nf2 = nf2.strip() nf2 = nf2.split(' ') nf2.remove(nf2[0]) return random.choice(nf2) def nm3(): nm3 = list() nm3 = lines[4] nm3 = nm3.strip() nm3 = nm3.split(' ') nm3.remove(nm3[0]) return random.choice(nm3) def nf3(): nf3 = list() nf3 = lines[5] nf3 = nf3.strip() nf3 = nf3.split(' ') nf3.remove(nf3[0]) return random.choice(nf3) def nm4(): nm4 = list() nm4 = lines[6] nm4 = nm4.strip() nm4 = nm4.split(' ') nm4.remove(nm4[0]) return random.choice(nm4) def nf4(): nf4 = list() nf4 = lines[7] nf4 = nf4.strip() nf4 = nf4.split(' ') nf4.remove(nf4[0]) return random.choice(nf4) def nm5(): nm5 = list() nm5 = lines[8] nm5 = nm5.strip() nm5 = nm5.split(' ') nm5.remove(nm5[0]) return random.choice(nm5) def nf5(): nf5 = list() nf5 = lines[9] nf5 = nf5.strip() nf5 = nf5.split(' ') nf5.remove(nf5[0]) return random.choice(nf5) def nm6(): nm6 = list() nm6 = lines[10] nm6 = nm6.strip() nm6 = nm6.split(' ') nm6.remove(nm6[0]) return random.choice(nm6) def nf6(): nf6 = list() nf6 = lines[11] nf6 = nf6.strip() nf6 = nf6.split(' ') nf6.remove(nf6[0]) return random.choice(nf6) def adjm1(): adjm1 = list() adjm1 = lines[12] adjm1 = adjm1.strip() adjm1 = adjm1.split(' ') adjm1.remove(adjm1[0]) return random.choice(adjm1) def adjm2(): adjm2 = list() adjm2 = lines[13] adjm2 = adjm2.strip() adjm2 = adjm2.split(' ') adjm2.remove(adjm2[0]) return random.choice(adjm2) def adjf2(): adjf2 = list() adjf2 = lines[14] adjf2 = adjf2.strip() adjf2 = adjf2.split(' ') adjf2.remove(adjf2[0]) return random.choice(adjf2) def adjm3(): adjm3 = list() adjm3 = lines[15] adjm3 = adjm3.strip() adjm3 = adjm3.split(' ') adjm3.remove(adjm3[0]) return random.choice(adjm3) def adjf3(): adjf3 = list() adjf3 = lines[16] adjf3 = adjf3.strip() adjf3 = adjf3.split(' ') adjf3.remove(adjf3[0]) return random.choice(adjf3) def adjm4(): adjm4 = list() adjm4 = lines[17] adjm4 = adjm4.strip() adjm4 = adjm4.split(' ') adjm4.remove(adjm4[0]) return random.choice(adjm4) def adjf4(): adjf4 = list() adjf4 = lines[18] adjf4 = adjf4.strip() adjf4 = adjf4.split(' ') adjf4.remove(adjf4[0]) return random.choice(adjf4) def adjm5(): adjm5 = list() adjm5 = lines[19] adjm5 = adjm5.strip() adjm5 = adjm5.split(' ') adjm5.remove(adjm5[0]) return random.choice(adjm5) def adjf5(): adjf5 = list() adjf5 = lines[20] adjf5 = adjf5.strip() adjf5 = adjf5.split(' ') adjf5.remove(adjf5[0]) return random.choice(adjf5) def adjm6(): adjm6 = list() adjm6 = lines[21] adjm6 = adjm6.strip() adjm6 = adjm6.split(' ') adjm6.remove(adjm6[0]) return random.choice(adjm6) def adjf6(): adjf6 = list() adjf6 = lines[22] adjf6 = adjf6.strip() adjf6 = adjf6.split(' ') adjf6.remove(adjf6[0]) return random.choice(adjf6) def v1(): v1 = list() v1 = lines[23] v1 = v1.strip() v1 = v1.split(' ') v1.remove(v1[0]) return random.choice(v1) def v2(): v2 = list() v2 = lines[24] v2 = v2.strip() v2 = v2.split(' ') v2.remove(v2[0]) return random.choice(v2) def v3(): v3 = list() v3 = lines[25] v3 = v3.strip() v3 = v3.split(' ') v3.remove(v3[0]) return random.choice(v3) def v4(): v4 = list() v4 = lines[26] v4 = v4.strip() v4 = v4.split(' ') v4.remove(v4[0]) return random.choice(v4) def v5(): v5 = list() v5 = lines[27] v5 = v5.strip() v5 = v5.split(' ') v5.remove(v5[0]) return random.choice(v5) def v6(): v6 = list() v6 = lines[28] v6 = v6.strip() v6 = v6.split(' ') v6.remove(v6[0]) return random.choice(v6) def partm3(): partm3 = list() partm3 = lines[29] partm3 = partm3.strip() partm3 = partm3.split(' ') partm3.remove(partm3[0]) return random.choice(partm3) def partm4(): partm4 = list() partm4 = lines[30] partm4 = partm4.strip() partm4 = partm4.split(' ') partm4.remove(partm4[0]) return random.choice(partm4) def partf4(): partf4 = list() partf4 = lines[31] partf4 = partf4.strip() partf4 = partf4.split(' ') partf4.remove(partf4[0]) return random.choice(partf4) def partm5(): partm5 = list() partm5 = lines[32] partm5 = partm5.strip() partm5 = partm5.split(' ') partm5.remove(partm5[0]) return random.choice(partm5) def partf5(): partf5 = list() partf5 = lines[33] partf5 = partf5.strip() partf5 = partf5.split(' ') partf5.remove(partf5[0]) return random.choice(partf5) def partm6(): partm6 = list() partm6 = lines[34] partm6 = partm6.strip() partm6 = partm6.split(' ') partm6.remove(partm6[0]) return random.choice(partm6) def partf6(): partf6 = list() partf6 = lines[35] partf6 = partf6.strip() partf6 = partf6.split(' ') partf6.remove(partf6[0]) return random.choice(partf6) def conj1(): conj1 = list() conj1 = lines[36] conj1 = conj1.strip() conj1 = conj1.split(' ') conj1.remove(conj1[0]) return random.choice(conj1) def conj2(): conj2 = list() conj2 = lines[37] conj2 = conj2.strip() conj2 = conj2.split(' ') conj2.remove(conj2[0]) return random.choice(conj2) def numm2(): numm2 = list() numm2 = lines[38] numm2 = numm2.strip() numm2 = numm2.split(' ') numm2.remove(numm2[0]) return random.choice(numm2) def numf2(): numf2 = list() numf2 = lines[39] numf2 = numf2.strip() numf2 = numf2.split(' ') numf2.remove(numf2[0]) return random.choice(numf2) def numm3(): numm3 = list() numm3 = lines[40] numm3 = numm3.strip() numm3 = numm3.split(' ') numm3.remove(numm3[0]) return random.choice(numm3) def numf3(): numf3 = list() numf3 = lines[41] numf3 = numf3.strip() numf3 = numf3.split(' ') numf3.remove(numf3[0]) return random.choice(numf3) def numm4(): numm4 = list() numm4 = lines[42] numm4 = numm4.strip() numm4 = numm4.split(' ') numm4.remove(numm4[0]) return random.choice(numm4) def numf4(): numf4 = list() numf4 = lines[43] numf4 = numf4.strip() numf4 = numf4.split(' ') numf4.remove(numf4[0]) return random.choice(numf4) def numm5(): numm5 = list() numm5 = lines[44] numm5 = numm5.strip() numm5 = numm5.split(' ') numm5.remove(numm5[0]) return random.choice(numm5) def numf5(): numf5 = list() numf5 = lines[45] numf5 = numf5.strip() numf5 = numf5.split(' ') numf5.remove(numf5[0]) return random.choice(numf5) def adv2(): adv2 = list() adv2 = lines[46] adv2 = adv2.strip() adv2 = adv2.split(' ') adv2.remove(adv2[0]) return random.choice(adv2) def adv3(): adv3 = list() adv3 = lines[47] adv3 = adv3.strip() adv3 = adv3.split(' ') adv3.remove(adv3[0]) return random.choice(adv3) def adv4(): adv4 = list() adv4 = lines[48] adv4 = adv4.strip() adv4 = adv4.split(' ') adv4.remove(adv4[0]) return random.choice(adv4) def adv5(): adv5 = list() adv5 = lines[49] adv5 = adv5.strip() adv5 = adv5.split(' ') adv5.remove(adv5[0]) return random.choice(adv5) def adv6(): adv6 = list() adv6 = lines[50] adv6 = adv6.strip() adv6 = adv6.split(' ') adv6.remove(adv6[0]) return random.choice(adv6) def random_line_5_1(): sentence5_1 = [adjm4() + ' ' + nm1(), adjm3() + ' ' + nm2(), adjm2() + ' ' + nm3(), adjm4() + ' ' + nm1(), adjf4() + ' ' + nf1(), adjf3() + ' ' + nf2(), adjf2() + ' ' + nf2(), adjf4() + ' ' + nf1(), partm3() + ' ' + nm2(), partm4() + ' ' + nm1(), partf4() + ' ' + nf1(), nm5(), nf5(), numm2() + ' ' + adjm1() + ' ' + nm2(), numm2() + ' ' + adjm2() + ' ' + nm1(), numm3() + ' ' + adjm1() + ' ' + nm1(), numm4() + ' ' + nm1(), numf2() + ' ' + adjf2() + ' ' + nf1(), numf2() + ' ' + nf3(), numf3() + ' ' + nf2()] return random.choice(sentence5_1) def random_line_7_1(): sentence7_1 = [adjm6() + ' ' + nm1(), adjm5() + ' ' + nm2(), adjm4() + ' ' + nm3(), adjm3() + ' ' + nm4(), adjm2() + ' ' + nm5(), adjm1() + ' ' + nm6(), adjf6() + ' ' + nf1(), adjf5() + ' ' + nf2(), adjf4() + ' ' + nf3(), adjf3() + ' ' + nf4(), adjf2() + ' ' + nf5(), partm6() + ' ' + nm1(), partm5() + ' ' + nm2(), partm4() + ' ' + nm3(), partm3() + ' ' + nm4(), partf6() + ' ' + nf1(), partf5() + ' ' + nf2(), partf4() + ' ' + nf3()] return random.choice(sentence7_1) def random_line_7_2(): sentence7_2 = [v6() + ' ' + conj1(), adv2() + ' ' + v4() + ' ' + conj1(), adv3() + ' ' + v3() + ' ' + conj1()] return random.choice(sentence7_2) def random_line_5_2(): sentence5_2 = [v5(), adv2() + ' ' + v3(), adv3() + ' ' + v2(), adv4() + ' ' + v1()] return random.choice(sentence5_2) def random_line_7_3(): sentence7_3 = [adv2() + ' ' + v5(), adv3() + ' ' + v4(), adv4() + ' ' + v3(), adv5() + ' ' + v2(), adv6() + ' ' + v1()] return random.choice(sentence7_3) def poem(): p = [random_line_5_1() + '.\n' + random_line_7_1() + '.\n' + random_line_5_1(), random_line_5_1() + '\n' + random_line_7_2() + '\n' + random_line_5_2(), random_line_5_1() + '\n' + random_line_7_3() + '.\n' + random_line_5_1(), random_line_5_1() + '\n' + random_line_7_3() + ',\n' + random_line_5_2()] return random.choice(p) print(poem()+'.') fname = input('Введите название файла: ') def openfile(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() text = text.lower() text = text.strip() words = [] words = text.split(' ') return words def count_words(fname): words = openfile(fname) n = 0 for word in words: word = word.strip('?!@ n += 1 return n def dicff(fname): words = openfile(fname) words.sort() fr = dict() for index in range(len(words)): if words[index] in fr: fr[words[index]] += 1 else: fr[words[index]] = 1 return fr print(count_words(fname), dicff(fname)) import re fname = input('Введите название файла: ') def openfile(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() text = text.lower() text = text.strip() words = [] words = text.split(' ') return words def words(fname): words = openfile(fname) a = [] for word in words: word = word.strip('?!@ a.append(word) return a regex = r'\bоткр(ыл[аи]?|о(ют?|е(шь|т|м|те))|ыть)\b' def formsearch(regex): wordlist = words(fname) match = [] for i in wordlist: i1 = str(i) m = re.search(regex, i1) if m != None: match.append(i) strmatch = '\n'.join(match) return strmatch print(formsearch(regex)) fname = input('Введите название файла: ') def openfile(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() text = text.lower() text = text.strip() words = [] words = text.split(' ') return words def ingform(fname): words = openfile(fname) a = [] for word in words: word = word.strip('?!@ if word.endswith('ing'): a.append(word) else: continue return a theword = input('Введите слово: ') def searching(theword): s = ingform(fname) b = 0 for i in s: if i == theword: b += 1 else: continue return b print(ingform(fname)) print(searching(theword)) import re fname = input('Введите название файла: ') def open_html(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() return text def find_capital(fname): text = open_html(fname) card = re.search(r'', text) if card != None: capital = re.search(r'data-wikidata-property-id="P36"(.*?)(.*?)', text) if capital != None: return capital.group(3) def find_country(fname): text = open_html(fname) card = re.search(r'
', text) if card != None: country = re.search(r'>(.*?)', text) if country != None: return country.group(1) print('Страна: ', find_country(fname), 'Столица: ', find_capital(fname)) import re fname = input('Введите название файла: ') def open_html(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() te = re.sub(u'<.*?(".*?")?.*?>', u'', text, flags = re.U) te2 = re.sub(u'', u'', te, flags = re.U) te3 = re.sub(u'', u'', te2, flags = re.U) te4 = re.sub(u'.*?', u'', te3, flags = re.U) return te4 def changeform(fname): te = open_html(fname) change1 = re.sub(u'комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'слон\\1', te, flags = re.U) change2 = re.sub(u'Комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'Слон\\1', change1, flags = re.U) with open('results.txt', 'w', encoding='utf-8') as n: n.write(change2) return 'Готово! Результаты в файле results.txt .' print(changeform(fname)) import os import re from math import log punct = '[.,!«»?&@"$\[\]\(\):;% tabs = '[\t\n]' def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) text_wo_punct = re.sub(tabs, ' ', text_wo_punct) words = text_wo_punct.strip().split() return words def count_tf(word, text): n = text.count(word) return n / len(text) def count_df(word, texts): i = [1 for text in texts if word in text] i = sum(i) return i def count_idf(word, texts): df = count_df(word, texts) try: idf = len(texts) / df except ZeroDivisionError: return 0 return idf def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) tfidf = log(tf, 10)*log(idf, 10) return tfidf def keywords(text, texts): keywords = {} dic_tfidf = {} for word in text: if word in dic_tfidf: continue tfidf = count_tfidf(word, text, texts) dic_tfidf[word] = tfidf i = 0 for el in sorted(dic_tfidf, key= lambda x: dic_tfidf[x]): if i > 5: break i += 1 keywords[el] = dic_tfidf[el] return keywords def main(): texts = {} for root, dirs, files in os.walk('wikipedia'): for f in files: with open(os.path.join(root,f), 'r', encoding='utf-8') as t: content = t.read() text = preprocessing(content) texts[f] = text raw_texts = list(texts.values()) for t in texts: print('\nИзвлекаем ключевые слова для текста {}'.format(t)) kwords = keywords(texts[t], raw_texts) for key in kwords: print(key, kwords[key]) if __name__ == '__main__': main()