([а-яёА-ЯЁa-zA-Z- ]*)\..*

]*?>(UTC[+-]?\d{1,2}:?\d{0,2})' for ti in text: m = re.search(reg, ti) if m != None: print(m.group(1)) return m.group(1) def record(): r = main() f = open("result.txt","w") f.write("Часовой пояс - " + r) f.close() record() def open_text(): with open('Austen Jane. Pride and Prejudice.txt', "r", encoding = "utf-8") as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip('.,!?-;:“"”''') return arr def isness(word): Ret = 0 if len(word) > 4: if word[-4:] == 'ness': Ret = 1 else: Ret = 0 return Ret def AddInList(word, List, Qn): Yes = 0 for i in range(len(List)): if (List[i] == word): Qn[i] +=1 Yes = 1 if (Yes == 0): List.append(word) Qn.append(1) Inarr = open_text() List = list() Qn = list() for i in range (len(Inarr)): if isness(Inarr[i]) == 1: AddInList(Inarr[i], List, Qn) print('Количество разных сущ. с суффиксом -ness равно: ' + str(len(List))) Max = 0 Ind = 0 for i in range(len(List)): if Qn[i] > Max: Ind = i Max = Qn[i] print('Максимальную частотность имеет слово: ' + List[Ind] + ', с частотностью: ' + str(Qn[Ind])) import os import re def papka(): folder = [f for f in os.listdir('.')if not re.search(r'[0-9]+',f)if os.path.isfile(f)] print(len(folder)) return folder papka() def dop(): arr = [] astr = 0 exist = 0 folder = [f for f in os.listdir('.')] for p in range(len(folder)): for j in range(len(folder[p])): if folder[p][j] == '.': astr = folder[p][0:j] exist = 0 for k in range(len(arr)): if arr[k] == astr: exist = 1 if exist == 0: arr.append(astr) return arr print(dop()) word = input ("Введите слово на кириллице:") i = 0 while i < len(word): if word[i] == 'п' or word[i] == 'о' or word[i] == 'е': print (word[i]) i = i+2 print ("Для завершения работы нажмите ENTER") ENTER = input ('') import re def open_text(): with open('Викинги — Википедия.html', "r", encoding = "utf-8") as f: text = f.read() return text def replacement(): result1 = re.sub('викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'бурундук\\1', open_text()) result2 = re.sub('Викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'Бурундук\\1', result1) return result2 def record(): r = replacement() f = open("result.txt","w", encoding = "utf-8") f.write(r) f.close() return f record() import re import os import csv def first(): reg = '' for i in os.listdir('.'): if i.endswith('.xhtml'): m = [] with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t: text = t.read() for t in re.findall(reg, text): m.append(t) with open('new_text.txt', 'a', encoding = 'utf-8') as f: f.write(i+'\t'+str(len(m)) + '\n') first() def second(): for i in os.listdir('.'): reg = '' with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t: text = t.read() for t in re.findall(reg, text): if re.search('', text): with open('table.csv', 'a', encoding = 'utf-8') as f: f.write(i+','+re.search('', text)) second() import random def read_words(filename): file = open(filename, "r", encoding = "utf-8") arr = [] for line in file: arr += line.strip().split(', ') file.close() return arr def verb(number): if number == 's': return random.choice(read_words("singular_verbs.txt")) else: return random.choice(read_words("plural_verbs.txt")) def noun(number): if number == 's': return random.choice(read_words("singular_nouns.txt")) else: return random.choice(read_words("plural_nouns.txt")) def clinoun(): return random.choice(read_words("clitic_noun.txt")) def adverb(): return random.choice(read_words("adverb.txt")) def punctuation(): return random.choice(read_words("punctuation.txt")) def verse1(): return clinoun() + ' ' + noun('s') + ' ' + adverb() + ' ' + verb('s') + punctuation() def verse2(): return noun('pl') + ' ' + verb('pl') + ' ' + adverb() + ' ' + clinoun() + punctuation() def verse3(): return noun('s') + ' ' + adverb() + ' ' + clinoun() + ' ' + verb('s') + punctuation() def make_verse(): verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(make_verse()) def open_text(): with open('green.txt', "r", encoding = "utf-8") as f: text = f.read() arr = text.split('.') return arr def deli(): txt = open_text() for i, w in enumerate(txt): for s in '.,!?-;:“"”''()«»–': txt[i] = txt[i].replace(s, "") return txt def des(): txt = deli() dlina = [x for x in txt if len(x.split()) > 10] return dlina def big(): txt = des() f = [] for i in txt: f += [x for x in i.split() if x[0].isupper()] return f print (big()) file = open("text.txt", "r", encoding = "utf-8") lmin = lmax = len(file.readline()) for line in file: lp = len(line) if lp > 0: if lmin > lp: lmin = lp if lmax < lp: lmax = lp print (lmax / lmin) def open_text_1(): with open('islandcorp.xml', "r", encoding = "utf-8") as f: Line = 0 for i in f: if i != '\n': Line += 1 else: break return Line def record(): with open("result1.txt","w", encoding = "utf-8") as f: f.write(str(open_text_1())) return import re def keys(): with open('islandcorp.xml', "r", encoding = "utf-8") as f: text = f.read() Dic = {} reg = '.*?' res = re.findall(reg, text) for i in range(len(res)): if res[i] not in Dic: Dic[res[i]] = 1 else: Dic[res[i]] += 1 return Dic def record1(): with open("result2.txt","w", encoding = "utf-8") as f: a = keys() for key in a: f.write(key + ',' + str(a[key])+ '\n') record() record1() import os import re def main(): Sum = 0 for root, dirs, files in os.walk('.'): for d in dirs: cir = 0 for i in range(len(d)): a = re.search(r'[а-яёЁ А-Я]+',d[i]) if a == None: cir = 1 if cir == 0: Sum += 1 return Sum print(main()) arr =[] word = input("Введите слово: ") while word: arr.append(word) word = input ("Введите слово: ") w = 0 for w in range (len(arr)): if len(arr[w]) > 5: print (arr[w]) print ("Для завершения работы нажмите ENTER") ENTER = input ('') import re def open_text(): with open('txtfind.txt', "r", encoding = "utf-8") as f: text = f.read() text = text.lower() arr = text.split() for i, w in enumerate(arr): arr[i] = arr[i].strip('.,!?-;:“"”''') return arr def find_in_text(): List = list() regex = '\W?(на(((й((д(у(т(ся)?)?|ёшь(ся)?|ёт(ся|е(сь)?)?|ём(ся)?|и|ите(сь)?|я|енный|ены))|ти(сь)?)))|(ш(ёл(ся)?|л(а|и|о)(сь)?|едш(и|(ий|ая|ее)(ся)?)))))\W?' words = open_text() for i in range (len(words)): m = re.search(regex, words[i]) if m != None: List.append(words[i]) return List uList = list() List = find_in_text() for i in range(len(List)): Include = 0 for j in range(len(uList)): if uList[j] == List[i]: Include = 1 if Include == 0: print(List[i]) uList.append(List[i]) word = ('abracadabra') i=0 while i <= len(word): print (word[0:i]) i = i+1 import random def get_words(fn): words = {} with open(fn, 'r') as fd: for line in fd: word, collocations = line.split(',', 1) words[word] = collocations.replace(word, '.'*len(word)).split(',') return words def ask_riddle(words_dict): words = list(words_dict.keys()) rnd_word = random.choice(words) rnd_collocation = random.choice(list(words_dict[rnd_word])) print(rnd_collocation) word = input('Пропущенное слово:') return rnd_word, word == rnd_word def main(): words = get_words('f3.csv') word, answer = ask_riddle(words) print('И это правильный ответ!' if answer else 'Вы ошиблись, правильный ответ: '+ word) return word, answer main() import re def main(): s = '' f = open("Викинги.html","r",encoding="utf-8") for line in f: line = re.sub("в(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","бурундук\\2",line) line = re.sub("В(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","Бурундук\\2",line) s = s + line f.close() return s def record(): s = main() f = open("results.txt","w",encoding='utf-8') f.write(s) f.close() record() a=int(input('input a number1: ')) b=int(input('input a number2: ')) c=int(input('input a number3: ')) print('\na=',a,'\nb=',b,'\nc=',c) if a*b==c: print('\nПроизведение чисел a и b равно числу c') else: print('\nПроизведение чисел a и b не равно c') if a*c+b==0: print('Число c является решением линейного уравнения a*x+b=0') else: print('Число c не является решением линейного уравнения a*x+b=0') print('\nЧтобы завершить программу, нажмите Enter') ENTER=input('') import random def phrase(): f0 = open("plus1.txt","r",encoding="utf-8") pr1 = f0.read().split() p1 = random.choice(pr1) f1 = open("plus2.txt","r",encoding="utf-8") pr2 = f1.read().split() p2 = random.choice(pr2) return p1 + ' ' + p2 def adjective(): f2 = open("adj.txt","r",encoding="utf-8") adj = f2.read().split() return random.choice(adj) def verb(): f3 = open("verb.txt","r",encoding="utf-8") v = f3.read().split() return random.choice(v) def noun(num): f4 = open("sg.txt","r",encoding="utf-8") nounsg = f4.read().split() f5 = open("pl.txt","r",encoding="utf-8") nounpl = f5.read().split() f6 = open("ind.txt","r",encoding="utf-8") nounind = f6.read().split() if num == 'pl': return random.choice(nounpl) if num == 'ind': return random.choice(nounind) return random.choice(nounsg) def punctuation(): puncts = [".", "?", "!", "...",";"] return random.choice(puncts) def verse1(): return phrase() + ' ' + noun("sg") + ' ' + noun("pl") + punctuation() def verse2(): return verb() + ', ' + verb() + ' ' + noun("ind") + punctuation() def verse3(): return noun("sg") + ' ' + adjective() + ' ' + noun("pl") + punctuation() def doit(): verse = random.choice([1,2,3]) if verse == 1: return verse1() elif verse == 2: return verse2() else: return verse3() for n in range(4): print(doit()) import os import re def texts(name): f = open(name, 'r') text = f.read() x = re.findall('.+', text) f.close() return x def resutls(s,fname): f = open(fname,"w",encoding = "utf-8") f.write(s) f.close() def words(): s = "" for roots, dirs, files in os.walk('.'): for file in files: if file.endswith('.xhtml'): s = s + file + "\t"+ str(len(texts(os.path.join(roots,file)))) + "\n" results(s,"result1.txt") if __name__ == '__words__': words() import os def main(): num = 0 for root, dirs, files in os.walk('.'): for d in dirs: k = 0 for i in d: if i not in "йцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮ": k += 1 if k == 0: num += 1 return num if __name__ == '__main__': print(main()) import random def words(): f = open("1.csv","r",encoding="utf-8") a = f.read().split(',') m = [] for n in a: b = n.rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ m.append(b) return m def d(): m = words() d = {} for i in m: a = i.split() d[a[0]] = a[1] return d def rand(): m = words() di = d() mas = [] for n in di.keys(): mas.append(n) v = random.choice(mas) return v def attempt(): di = d() v = rand() j = 0 for i in di[v]: j += 1 print(v,'.'*j) s = input() if s == di[v]: result = "you win" else: result = "you lose" return result print(attempt()) s=input("введите слово: ") i=0 for letter in s: if (i+1)%2!=0 : if s[i]=='о' or s[i]=='п' or s[i]=='е': print(s[i]) i=i+1 print('\nЧтобы завершить программу, нажмите Enter') ENTER=input('') import re def text(): a=[] f = open("Санкт-Петербург.html","r",encoding="utf-8") for line in f: a.append(line) return a def main(): a=text() s='' p1 = int; p2 = int regex = '"[A-Z][A-Z][A-Z](\+|-)?[0-9][0-9]?:?[0-90-9]?"' for line in a: b=line.split() for i in b: res = re.search(regex,i) if res != None: p1 = i.find('>') p2 = i.find('<') s=i[p1+1:p2] return s def record(): s=main() f = open("result.txt","w") f.write("Часовой пояс - "+s) f.close() record() def names(): import os m = os.listdir('.') return m def main(): m = names() newm = [] num = 0 for i in m: k = 0 for j in i: if j in '1234567890': k += 1 if k == 0: num += 1 if '.' in i: i = i[:i.index('.')] if i not in newm: newm.append(i) print('num = {}'.format(num)) print(newm) if __name__ == '__main__': main() def text(): f = open("ness.txt","r",encoding="utf-8") a = f.read().split() m = [] for n in a: b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ m.append(b) return m def ness(m): mas = [] s = "" for i in m: if i[-4:] == 'ness': if i not in s: mas.append(i) s = s + i + " " return mas def numb(): m = text() mas = ness(m) return len(mas) def main(): m = text() b = ness(m) mas2 = [] fr = "" s = "" for i in m: if i[-4:] == 'ness': s = s + i + " " for n in b: mas2.append(s.count(n)) maxi = mas2[0] for j in mas2: if j > maxi: maxi = j for n in b: if s.count(n) == maxi: fr=fr+" "+n return fr print("Количество разных слов на -ness =",numb(),"\nСамое(ые) частотное(ые) -",main()) import re def lines(): f = open('vim4.txt','r',encoding='utf-8') a = f.read() c = re.split(r'[.?!]',a) lines = [' '.join([word.strip('.,<>/?""-=_+''""[]{}()*&^%$ return lines def main(): sents = lines() results = [] for line in sents: k = '' k = ['+' for w in line.split()] if len(k) > 10: for w in line.split(): if w.istitle() == True: results.append(w) return results if __name__ == '__main__': print(main()) import re def text(): f = open("portrait.txt","r",encoding="utf-8") a = f.read().split() m = [] for n in a: b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$ m.append(b) return m def main(): m = text() regex = 'на(й|ш(е|ё)?)(т|д|л)(ш|енн?)?(а?я?|(и|о|ы|(е|ё)|ую?)?(т|шь)?(ся)?(м(у|и)?|го|е|й|х)?)?' s = '' for i in m: res = re.search(regex,i) if res != None: k = 0 for j in i: if j not in regex: k += 1 if k == 0: if i not in s: s = s + i + ' ' return s print(main()) import re def opp(): k = 0 f = open("it.xml","r",encoding="utf-8") for line in f: k += 1 f.close() return k def record1(): f = open('result1.txt','w',encoding='utf-8') f.write(str(opp())) f.close() record1() def dic(): d = {} regex1 = 'lemma="' regex2 = 'type="[a-zþ0-9]+"' f = open("it.xml","r",encoding="utf-8") for line in f: if re.search(regex1,line) != None: res = re.search(regex2,line) if res != None: p1 = line.rfind('"') p2 = line.find('type=') s = line[p2+6:p1] if s in d.keys(): d[s] += 1 else: d[s] = 1 return d def record2(): d = dic() f = open('result1.txt','a',encoding='utf-8') for i in d.keys(): f.write('\n'+i) f.close() record2() def plur(): d = {} regex1 = 'lemma="' regex2 = 'type="[a-zþ0-9]+"' f = open("it.xml","r",encoding="utf-8") for line in f: if re.search(regex1,line) != None: res = re.search(regex2,line) if res != None: p1 = line.rfind('"') p2 = line.find('type=') s = line[p2+6:p1] if s[0] == 'l' and s[2] == 'f': if s in d.keys(): d[s] += 1 else: d[s] = 1 return d def record3(): d = plur() f = open('result2','w',encoding='utf-8') for i in d.keys(): f.write(i+' - '+str(d[i])+'\n') f.close() record3() f=open("new1.txt","r",encoding = "utf-8") mx=mn=len(f.readline()) for line in f: if line != "\n": if len(line) > mx: mx = len(line) if len(line) < mn: mn = len(line) print(mx/mn) f.close() f=open("text1.txt","r",encoding = "utf-8") for line in f: sym=line.split(" ") if sym[2]=="союз": print(line) f.close() f=open("text1.txt","r",encoding = "utf-8") s = input("Введите слово: ") m = [] while s!='': m.append(s) s=input("Введите слово: ") for i in m: for line in f: sym = line.split(" ") if i == sym[0]: print(i,sym[1:]) else: print(i+" - в словаре нет такого слова") break f.close() f=open("text1.txt","r",encoding = "utf-8") s=0 for line in f: sym=line.split(" ") if sym[4]=="ед" and sym[5]=="жен": print(sym[0]+",") s=s+float(sym[-1]) print(s) f.close() m=[] s=input('введите слово: ') while s!='': m.append(s) s=input('введите слово: ') for word in m: if len(word)>5: print(word) print('Чтобы завершить программу, нажмите ENTER') ENTER=input('') word=input('введите слово: ') newword='' for letter in word: newword=newword+letter print(newword) import re import os def countsent(file): sent = 0 s = open (file,'r') lines = s.readlines() for line in lines: if re.search('',line): sent = sent + 1 return sent def file_countsent(): cw = open ('countsent.txt','w',encoding='utf-8') for root, dirs, files in os.walk('news'): for f in files: cw.write(f+'\t'+str(countsent(os.path.join(root, f)))+'\n') def text_data(txt1): topic = re.search(r'', txt1).group(1) author = re.search(r'', txt1).group(1) data = [author, topic] return data def csv(data, name): with open(name, 'a', encoding='cp1251') as f: f.write(data[2]+'\t'+data[0]+'\t'+data[1]+'\n') def supertable(): data1 = [] for root, dirs, files in os.walk('news'): for f in files: with open(os.path.join(root, f), 'r', encoding='cp1251') as m: txt = m.read() data = text_data(txt) data.append(f) data1.append(data) for data in data1: csv(data, 'supertable.csv') file_countsent() supertable() import re def openfile(): file1 = input('Введите путь к файлу: ') with open(file1, "r", encoding="utf-8") as f: arr = [] lines = f.readlines() for line in lines: if line.strip() == '': break else: arr.append(line) print('Число строк заголовка', len(arr)) def dictionary(): file2 = input('Введите путь к файлу: ') with open(file2, "r", encoding="utf-8") as f: dictn = {} text = f.read() findtype = re.findall(r'type="\w+">', text) for i in findtype: i = i[6::].strip('">') if i not in dictn: dictn[i] = 1 else: dictn[i] += 1 file3 = input('Введите путь к файлу, куда будет записана информация из словаря: ') with open(file3, "r", encoding="utf-8") as f: for key in dictn: f.write(str(key, dictn[key])) openfile() dictionary() with open('ugadaika.csv', 'r', encoding = 'utf-8') as f: words = [] a = f.read() words = a.split(',') dic = {} for i, word in enumerate(words): if i%2 == 0: dic[word] = words[i+1] print('Я хочу сыграть с тобой в одну игру... Какое слово я загадал? Количество точек равно количеству букв в слове.') for key in dic: print(dic[key]) b = input() if b == key: print('Молодчинка!!!') else: print ('Ты не очень умный, я загадал не это.') mylist = [] with open('proga.txt', 'r', encoding='utf-8') as f: for line in f.readlines(): x = len(line) mylist.append(x) mini = mylist[0] maxi = mylist[0] for i in mylist: if i <= mini: mini = i if i > maxi: maxi = i print(maxi/mini) import random def adj(): a=[] with open ('adj.txt','r',encoding='utf-8') as f: a=f.read() return random.choice(a.split()) def Petya(): b=[] with open ('nouns_like_Petya.txt','r',encoding='utf-8') as f: b=f.read() return random.choice(b.split()) def kustik(): k=[] with open ('nouns_like_kustik.txt','r',encoding='utf-8') as f: k=f.read() return random.choice(k.split()) def prep(): c=[] with open ('prep.txt','r',encoding='utf-8') as f: c=f.read() return random.choice(c.split()) def adjfem(): d=[] with open ('adjfem.txt','r',encoding='utf-8') as f: d=f.read() return random.choice(d.split()) def nounfem(): e=[] with open ('nounfem.txt','r',encoding='utf-8') as f: e=f.read() return random.choice(e.split()) def verb(): g=[] with open ('verbpf.txt','r',encoding='utf-8') as f: g=f.read() return random.choice(g.split()) def punct(): h=[] with open ('punct.txt','r',encoding='utf-8') as f: h=f.read() return random.choice(h.split()) def verse1(): return adj() + ' ' + Petya() + ' ' + verb() + ' ' + kustik() + punct() def verse2(): return prep() + ' ' + adjfem() + ' ' + nounfem() + punct() def verse3(): return adj() + ' ' + kustik() + ' ' + verb() + ' ' + Petya() + punct() def verse4(): return Petya() + ' ' + verb() + ' ' + nounfem() + punct() def make_verse(): verse = random.choice([1,2,3,4]) if verse == 1: return verse1() elif verse == 2: return verse2() elif verse == 3: return verse3() else: return verse4() for n in range(4): print(make_verse()) import os import re nonum = [] num = [] for f in os.listdir('.'): if re.search('[1234567890]', f): num.append(f) else: nonum.append(f) print('Файлов, не содержащих цифр в названии: ', len(nonum)) print('Введите число a и нажмите Enter') a=int(input()) print('Введите число b и нажмите Enter') b=int(input()) print('Введите число c и нажмите Enter') c=int(input()) if a*b==c: print(c ,'является произведением', a,' и ', b) else: print(c ,' не является произведением', a,' и ', b) if c*a==(-1)*b: print(c,'является решением линейного уравнения', a,'x +',b,'= 0') else: print(c,'не является решением линейного уравнения', a,'x +',b,'= 0') b=1 int (b) a=(input()) for i in a: if (b%2)&((i=='о')or(i=='п')or(i=='е')): print (i) b+=1 import re def vikings(): wikifile = input('Время альтернативной истории! Введите имя файла со статьей про викингов: ') with open(wikifile, 'r', encoding = 'utf-8') as f: wikiarticle = f.read() return wikiarticle def change1(wikiarticle): myarticle1 = re.sub('викинг', 'бурундук', wikiarticle) return myarticle1 def change2(myarticle1): myarticle2 = re.sub('Викинг', 'Бурундук', myarticle1) return myarticle2 def chimpunks(myarticle2): newfile = input('Введите имя файла, куда следует поместить измененную статью: ') with open(newfile, 'w', encoding = 'utf-8') as f: f.write(myarticle2) def go(): chimpunks(change2(change1(vikings()))) go() import re def findforms(): find = r"\bна(ш(ёл(ся)?|е(л(ся)?|дш(е(го(ся)?|м(ся|у(ся)?)?|е(ся)?|й(ся)?|ю(ся)?)|ую(ся)?|ая(ся)?|и(й(ся)?|е(ся)?|сь|м(и(ся)?)?|х(ся)?)?))|л(а(сь)?|о(сь)?|и(сь)?))|й(ти(сь)?|д(я(сь)?|у(сь|т(ся)?)?|ё(м(ся)?|шь(ся)?|т(ся|е(сь)?)?|нн(ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?)))|е(шь(ся)?|т(ся|е(сь)?)?|м(ся)?|н(а|о|ы|н((ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?))))?)|и(сь|те(сь)?)?)))\b" arr = [] with open("find.txt", "r", encoding="utf-8") as f: words = f.read() for word in words.split(): p = re.search(find, word) if p != None: if word not in arr: arr.append(word) for item in arr: print(item) findforms() a = [] s = str(input("Введите слово ")) while s != (""): if len(s) > 5: a.append(s) s = str(input("Введите слово ")) print('\n'.join(a)) def counting(): with open('isl.txt', 'r', encoding='utf-8') as islen: islen.read() str = islen.readline().replace('\n', '') islenlines = [] islencount = 0 for line in islen: islenlines.append islencount = 0 if '' in line: break print(islencount) counting() def dictionary(): lemmas = [] alsolemmas = [] str = islen.readline for i in range(str): if '(.*?)' links = re.findall(reg, content) return links text = open_html('butterflies.html') links = find_links(text) for link in links[:20]: print(link[1], '-->', link[0]) d = {"Россия":'Москва', "Польша":'Варшава', "США":'Вашингтон', "Болгария":'София', "Армения":'Ереван', "Бразилия":'Бразилиа', "Испания":'Москва'} def delete_doubles(d): arr = [] new = {} for key in d: if d[key] in arr: else: new[key] = key arr.append(d[key]) return a delete_doubles(d) import re def open_html(fname): with open (fname, 'r', encoding='utf-8') as f: text = f.read() return text def tags(text): m = re.sub(r'<.*?>', r'', text) t = re.sub(r'\s+',r' ', m) s = re.sub(r'Илон Маск', r'Маленький котёнок',t) return s print(tags(open_html('musk.html'))) import re rain = r"\b\дожд([ьюи]|е|ей|я(м|ми?)|ях|ём?)?\b" s = input('Введите какое-нибудь слово: ') m = re.search(rain, s) if m != None: print('Это слово является формой слова "дождь"!') else: print('Нетушки!') import codecs def open_file(file_name): f = codecs.open(file_name, 'r', 'utf-8-sig') words = [] for line in f: line = line.strip() words += line.split() for word in words: word = word.strip(u'.,!?:;()\'\"1234567890') word = word.lower() return words def bigramms(words): bi = create_list(words) dic = {} for j in bi: if j not in dic: dic[j] = 1 else: dic[j] += 1 answer = '' answer = [n + '\r\n' for n in dic] print(answer) return answer def create_list(words): bi = [] for i in range(len(words)): if i < (len(words) - 1): j = i+1 bi.append(words[i] + words[j]) return bi words = open_file('text.txt') bigramms(words) import re with open('news.txt', 'r', encoding = 'utf-8') as f: text = f.read() punct = '[.,?!:;"\'—@–...«» tabs = '[\t\n]' def preprocessing(text): text = text.strip().lower() text = re.sub(punct, '', text) text = re.sub(tabs, ' ', text) words = text.split() return words words = preprocessing(text) def make_freq(arr): d = {} for el in arr: try: d[el] += 1 except KeyError: d[el] = 1 return d word_freq = make_freq(words) def make_bigrams(arr): bigrams = [] for i in range(len(words)): bigr = arr[i] + ' ' + arr[i + 1] bigrams.append(bigr) return bigrams bigrams = make_bigrams(words) bigrams_freq = make_freq(bigrams) from math import log def count_pmi(x, y): bigr = x + ' ' + y try: p_x = word_freq[x]/len(words) except KeyError: p_x = 0 try: p_y = word_freq[y]/len(words) except KeyError: p_y = 0 try: p_xy = bigrams_freq[bigr]/len(bigrams) except KeyError: p_xy = 0 try: pmi = log(p_xy/(p_x*p_y)) except ZeroDivisionError: pmi = 0 return pmi def calculate_pmi(): pmis ={} for bigr in bigrams: x, y = bigr.split() pmi = count_pmi(x, y) pmis[bigr] = pmi return pmis pmi = calculate_pmi() i = 0 for el in sorted(pmi, key = lambda m: -pmi[m]): if i > 100: break print(el, pmi[el]) i += 1 import os corpus_anek = '' corpus_izvest = '' corpus_teh = '' for root, dirs, files in os. walk('texts'): if 'anekdots' in root: for f in files: with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: text = f1.read() corpus_anek += text if 'teh_mol' in root: for f in files: with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: text = f1.read() corpus_teh += text if 'izvest' in root: for f in files: with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1: text = f1.read() corpus_izvest += text print(corpus_teh[:100]) words_anek = preprocessing(corpus_anek) words_teh = preprocessing(corpus_teh) words_izvest = preprocessing(corpus_izvest) words_all = words_anek + words_teh + words_izvest freq_anek = make_freq(words_anek) freq_teh = make_freq(words_teh) freq_izvest = make_freq(words_izvest) freq_all = make_freq(words_all) def count_pmi_cats(word, category): p_word = freq_all[word]/len(words_all) p_cat = 1/3 if category == 'anek': d = freq_anek w = len(words_anek) elif category == 'izvest': d = freq_izvest w = len(words_izvest) elif category == 'teh': d = freq_teh w = len(words_teh) p_word_cat = d[word]/w pmi = log(p_word_cat/(p_word*p_cat)) return pmi for w in words: if i > 100: break try: pmi_anek = count_pmi_cats(w, 'anek') pmi_izvest = count_pmi_cats(w, 'izvest') pmi_teh = count_pmi_cats(w, 'teh') max_pmi = max(pmi_anek, pmi_izvest, pmi_teh) if max_pmi == pmi_anek: print(w, 'anek') elif max_pmi == pmi_izvest: print(w, 'izvest') elif max_pmi == pmi_teh: print(w, 'teh') except KeyError: pass i += 1 import os import re from math import log punct = '[.,!«»?&@"$\[\]:;% tabs = '[\t\n]' def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) text_wo_punct = re.sub(tabs, ' ',text_wo_punct) words = text_wo_punct.strip().split() return words def count_tf(word, text): n = text.count(word) return n / len(text) def count_df(word, texts): i = [True for text in texts if word in text] i = sum(i) return i def count_idf(word, texts): df = count_df(word, texts) try: idf = len(texts) / df except ZeroDivisionError: return 0 return idf def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) tfidf = log(tf, 10) * log(idf, 10) return tfidf def keywords(text, texts): keywords = {} dic_tfidf = {} for word in text: if word in dic_tfidf: continue tfidf = count_tfidf(word, text, texts) dic_tfidf[word] = tfidf i = 0 for el in sorted(dic_tfidf, key = lambda x: dic_tfidf[x]): if i > 5: break i += 1 keywords[el] = dic_tfidf[el] return keywords def main(): texts = {} for root, dirs, files in os.walk('wikipedia'): for f in files: with open(os.path.join(root, f),'r', encoding = 'utf-8') as t: content = t.read() text = preprocessing(content) texts[f] = text raw_texts = list(texts.values()) for t in texts: print('\nИзвлекаем ключевые слова для текста {}'.format(t)) kwords = keywords(texts[t], raw_texts) for key in kwords: print(key, kwords[key]) if __name__ == '__main__': main() print ("Здравствуйте!"\ ) a = int(input("Введите число a: ")) b = int(input("Введите число b: ")) c = int(input("Введите число c: ")) if a + b == c: print ("Числа a и b в сумме дают число c") else: print ("Числа a и b в сумме НЕ дают число c") if c == -b / a: print ("Число c является решением линейного уравнения ax + b = 0") else: print ("Число c НЕ является решением линейного уравнения ax + b = 0") import re import os def folders(): counter = 0 numbers = '[0-9]' titles = os.listdir('.') for i in titles: if os.path.isdir(i) and re.search (numbers, i): counter += 1 return str(counter) def names(): print('Все файлы и(или) папки в текущей папке: ') arr = [] res = '\..+' for i in os.listdir('.'): name = i if os.path.isdir(i): name = re.sub(res, '', i) if name not in arr: arr.append(name) for each in arr: if each: print(each + '\n') else: print('None') print('Количество папок с цифрами в названии в текущей папке: ' + folders()) names() def open_read(): num = 0 with open('F.xml', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: num += 1 return num def write_doc(num): numlines = str(num) with open ('Number.txt', 'w', encoding = 'utf-8') as new_doc: new_doc.write(numlines) print('Количество строк: ' + numlines + '\n' + 'Создан документ Number.txt') def main(): write_doc(open_read()) main() import os a = {} def dict_new(): for root, dirs, files in os.walk('.\\news'): for file in files: with open (os.path.join(root, file), 'r', encoding = 'cp1251') as page: raw_text = page.read() a[file] = raw_text.count('(.*?)', text, flags = re.DOTALL) cap = cap.group(3) return cap def write_doc(cap): with open ('Capital.txt', 'w', encoding = 'utf-8') as new_doc: new_doc.write(cap) print('Столица данной страны: ' + cap + '\n' + 'Создан документ Capital.txt') def main(): write_doc(capital(open_read())) main() word = input() text = [] while word: text.append(word) word = input() for i in range(len(text)): new = text[i] new = new[::-1] new = list(new) for t in range (len(new)): if (t + 1) % 3 == 0: new[t] = '' wrd = ''.join(new) print(wrd) with open('Master and Margarita.txt','r', encoding = 'utf-8') as MM: tablewords = [] space = 0 lines = MM.readlines() print(' ', *lines) for i in range(len(lines)): for k in range(len(lines[i])): if lines[i][k] == ' ': space += 1 tablewords.append(space + 1) space = 0 number = 0 for l in range(len(tablewords)): number += tablewords[l] averword = number/len(lines) print('\n','Среднее количество слов в строке =',averword) word = input('Введите слово: ') print(word) for i in range(len(word)): print(word[:-(1+i)]) import random with open('allwords.txt', 'r', encoding = 'utf-8') as aw: lines = aw.readlines() def noun_m1(): noun_m1 = [] noun_m1 = lines[1].split(' ') return random.choice(noun_m1) def noun_f1(): noun_f1 = [] noun_f1 = lines[2].split(' ') return random.choice(noun_f1) def noun_m2(): noun_m2 = [] noun_m2 = lines[3].split(' ') return random.choice(noun_m2) def noun_f2(): noun_f2 = [] noun_f2 = lines[4].split(' ') return random.choice(noun_f2) def noun_mid2(): noun_mid2 = [] noun_mid2 = lines[5].split(' ') return random.choice(noun_mid2) def noun_m3(): noun_m3 = [] noun_m3 = lines[6].split(' ') return random.choice(noun_m3) def noun_f3(): noun_f3 = [] noun_f3 = lines[7].split(' ') return random.choice(noun_f3) def noun_mid3(): noun_mid3 = [] noun_mid3 = lines[8].split(' ') return random.choice(noun_mid3) def noun_m4(): noun_m4 = [] noun_m4 = lines[9].split(' ') return random.choice(noun_m4) def noun_f4(): noun_f4 = [] noun_f4 = lines[10].split(' ') return random.choice(noun_f4) def noun_mid4(): noun_mid4 = [] noun_mid4 = lines[11].split(' ') return random.choice(noun_mid4) def noun_m5(): noun_m5 = [] noun_m5 = lines[12].split(' ') return random.choice(noun_m5) def noun_f5(): noun_f5 = [] noun_f5 = lines[13].split(' ') return random.choice(noun_f5) def noun_mid5(): noun_mid5 = [] noun_mid5 = lines[14].split(' ') return random.choice(noun_mid5) def noun_m6(): noun_m6 = [] noun_m6 = lines[15].split(' ') return random.choice(noun_m6) def noun_f6(): noun_f6 = [] noun_f6 = lines[16].split(' ') return random.choice(noun_f6) def noun_mid6(): noun_mid6 = [] noun_mid6 = lines[17].split(' ') return random.choice(noun_mid6) def verb_1(): verb_1 = [] verb_1 = lines[20].split(' ') return random.choice(verb_1) def verb_2(): verb_2 = [] verb_2 = lines[21].split(' ') return random.choice(verb_2) def verb_3(): verb_3 = [] verb_3 = lines[22].split(' ') return random.choice(verb_3) def verb_4(): verb_4 = [] verb_4 = lines[23].split(' ') return random.choice(verb_4) def verb_5(): verb_5 = [] verb_5 = lines[24].split(' ') return random.choice(verb_5) def verb_6(): verb_6 = [] verb_6 = lines[25].split(' ') return random.choice(verb_6) def conj_1(): conj_1 = [] conj_1 = lines[28].split(' ') return random.choice(conj_1) def conj_2(): conj_2 = [] conj_2 = lines[29].split(' ') return random.choice(conj_2) def adj_m1(): adj_m1 = [] adj_m1 = lines[32].split(' ') return random.choice(adj_m1) def adj_m2(): adj_m2 = [] adj_m2 = lines[33].split(' ') return random.choice(adj_m2) def adj_f2(): adj_f2 = [] adj_f2 = lines[34].split(' ') return random.choice(adj_f2) def adj_m3(): adj_m3 = [] adj_m3 = lines[35].split(' ') return random.choice(adj_m3) def adj_f3(): adj_f3 = [] adj_f3 = lines[36].split(' ') return random.choice(adj_f3) def adj_mid3(): adj_mid3 = [] adj_mid3 = lines[37].split(' ') return random.choice(adj_mid3) def adj_m4(): adj_m4 = [] adj_m4 = lines[38].split(' ') return random.choice(adj_m4) def adj_f4(): adj_f4 = [] adj_f4 = lines[39].split(' ') return random.choice(adj_f4) def adj_mid4(): adj_mid4 = [] adj_mid4 = lines[40].split(' ') return random.choice(adj_mid4) def adj_m5(): adj_m5 = [] adj_m5 = lines[41].split(' ') return random.choice(adj_m5) def adj_f5(): adj_f5 = [] adj_f5 = lines[42].split(' ') return random.choice(adj_f5) def adj_mid5(): adj_mid5 = [] adj_mid5 = lines[43].split(' ') return random.choice(adj_mid5) def adj_m6(): adj_m6 = [] adj_m6 = lines[44].split(' ') return random.choice(adj_m6) def adj_f6(): adj_f6 = [] adj_f6 = lines[45].split(' ') return random.choice(adj_f6) def adj_mid6(): adj_mid6 = [] adj_mid6 = lines[46].split(' ') return random.choice(adj_mid6) def adv_2(): adv_2 = [] adv_2 = lines[49].split(' ') return random.choice(adv_2) def adv_3(): adv_3 = [] adv_3 = lines[50].split(' ') return random.choice(adv_3) def adv_4(): adv_4 = [] adv_4 = lines[51].split(' ') return random.choice(adv_4) def adv_5(): adv_5 = [] adv_5 = lines[52].split(' ') return random.choice(adv_5) def adv_6(): adv_6 = [] adv_6 = lines[53].split(' ') return random.choice(adv_6) def numeral_m2(): numeral_m2 = [] numeral_m2 = lines[56].split(' ') return random.choice(numeral_m2) def numeral_f2(): numeral_f2 = [] numeral_f2 = lines[57].split(' ') return random.choice(numeral_f2) def numeral_mid2(): numeral_mid2 = [] numeral_mid2 = lines[58].split(' ') return random.choice(numeral_mid2) def numeral_m3(): numeral_m3 = [] numeral_m3 = lines[59].split(' ') return random.choice(numeral_m3) def numeral_f3(): numeral_f3 = [] numeral_f3 = lines[60].split(' ') return random.choice(numeral_f3) def numeral_mid3(): numeral_mid3 = [] numeral_mid3 = lines[61].split(' ') return random.choice(numeral_mid3) def numeral_m4(): numeral_m4 = [] numeral_m4 = lines[62].split(' ') return random.choice(numeral_m4) def numeral_f4(): numeral_f4 = [] numeral_f4 = lines[63].split(' ') return random.choice(numeral_f4) def numeral_mid4(): numeral_mid4 = [] numeral_mid4 = lines[64].split(' ') return random.choice(numeral_mid4) def numeral_m5(): numeral_m5 = [] numeral_m5 = lines[65].split(' ') return random.choice(numeral_m5) def numeral_f5(): numeral_f5 = [] numeral_f5 = lines[66].split(' ') return random.choice(numeral_f5) def numeral_mid2(): numeral_mid5 = [] numeral_mid5 = lines[67].split(' ') return random.choice(numeral_mid5) def numeral_f6(): numeral_f6 = [] numeral_f6 = lines[68].split(' ') return random.choice(numeral_f6) def numeral_mid6(): numeral_mid6 = [] numeral_mid6 = lines[69].split(' ') return random.choice(numeral_mid6) def row_1_5(): phrase_of_5_1 =[adj_m1() + ' ' + noun_m4(), adj_m2() + ' ' + noun_m3(), adj_m3() + ' ' + noun_m2(), adj_m4() + ' ' + noun_m1(), numeral_m2() + ' ' + noun_m1() + ' ' + verb_2(), numeral_m2() + ' ' + noun_m2() + ' ' + verb_1(), numeral_m2() + ' ' + noun_m3(), numeral_m3() + ' ' + noun_m1() + ' ' + verb_1(), numeral_m3() + ' ' + noun_m2(), adj_f2() + ' ' + noun_f3(), adj_f3() + ' ' + noun_f2(), adj_f4() + ' ' + noun_f1(), numeral_f2() + ' ' + noun_f1() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f2() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f3(), numeral_f3() + ' ' + noun_f1() + ' ' + verb_1(), numeral_f3() + ' ' + noun_f2(), numeral_mid2() + ' ' + verb_2(), numeral_mid2() + ' ' + noun_mid2() + ' ' + verb_1(), numeral_mid2() + ' ' + noun_mid3(), numeral_mid3() + ' ' + verb_1(), numeral_mid3() + ' ' + noun_mid2(),noun_m5(), noun_f5(), noun_mid5()] return random.choice(phrase_of_5_1) def row_1_7(): phrase_of_7_1 =[adv_2() + ' ' + verb_5(), adv_3() + ' ' + verb_4(), adv_4() + ' ' + verb_3(), adv_5() + ' ' + verb_2(), adv_6() + ' ' + verb_1(), adv_2() + ' ' + verb_4() + ' ' + conj_1(), adv_2() + ' ' + verb_3() + ' ' + conj_2(), adv_3() + ' ' + verb_3() + ' ' + conj_1(), adv_3() + ' ' + verb_2() + ' ' + conj_2(), adv_4() + ' ' + verb_2() + ' ' + conj_1(), adv_4() + ' ' + verb_1() + ' ' + conj_2(), adv_5() + ' ' + verb_1() + ' ' + conj_1(), adv_5() + ' ' + conj_2(), adv_6() + ' ' + conj_1()] return random.choice(phrase_of_7_1) def row_2_5(): phrase_of_5_2 =[verb_1() + ' ' + noun_m4(), verb_2() + ' ' + noun_m3(), verb_3() + ' ' + noun_m2(), verb_4() + ' ' + noun_m1(), verb_1() + ' ' + noun_f4(), verb_2() + ' ' + noun_f3(), verb_3() + ' ' + noun_f2(), verb_4() + ' ' + noun_f1(), verb_1() + ' ' + noun_mid4(), verb_2() + ' ' + noun_mid3(), verb_3() + ' ' + noun_mid2()] return random.choice(phrase_of_5_2) def row_2_7(): phrase_of_7_2 =[noun_m1() + ' ' + verb_6(),noun_m2() + ' ' + verb_5(),noun_m3() + ' ' + verb_4(),noun_m4() + ' ' +verb_3(), noun_m5() + ' ' + verb_2(), noun_m6() + ' ' + verb_1(), noun_f1() + ' ' + verb_6(), noun_f2() + ' ' + verb_5(), noun_f3() + ' ' + verb_4(), noun_f4() + ' ' + verb_3(), noun_f5() + ' ' + verb_2(), noun_f6() + ' ' + verb_1(), noun_mid2() + ' ' + verb_5(), noun_mid3() + ' ' + verb_4(), noun_mid4() + ' ' + verb_3(), noun_mid5() + ' ' + verb_2(), noun_mid6() + ' ' + verb_1()] return random.choice(phrase_of_7_2) def row_3_5(): phrase_of_5_3 =[verb_5(), adv_5()] return random.choice(phrase_of_5_3) def haiku(): ready = [row_2_5() + '\n' + row_2_7() + '\n' + row_1_5(), row_3_5() + '\n' + row_2_7() + '\n' + row_3_5(), row_1_5() + '\n' + row_1_7() + '\n' + row_3_5()] return random.choice(ready) print(haiku()) word = input ('give a word') lenghth = len(word) z = 0 newword ='space' while newword != '': newword = '' newword = word[z:lenghth] print(newword) z += 1 lenghth -= 1 import re def sentences(): with open ('text.txt','r',encoding = 'utf-8') as f: text = f.read() m = re.findall('[^.!?]{1,}?[.?!]', text) m= [sent.split() for sent in m] for sentence in m: for i in range(len(sentence)): sentence[i] = sentence[i].strip('!?.,;:"').lower() return m def output(m): maxi = max([len(word) for sentence in m for word in sentence]) sentence_number = 0 for sentence in m: sentence_number += 1 print ('предложение №', sentence_number) words = [] for word in sentence: if word not in words: words.append(word) j = 0 for i in range(0, len(sentence) - 1): if word == sentence[i]: j += 1 if j > 1: print('{:^{maxi}} {:^2}'.format(word,j, maxi = maxi)) output(sentences()) import csv import random def open_file(): with open('some.csv', 'r') as f: a =[] reader = csv.reader(f) for line in reader: a.append(line) return a def dictionary(a): d = {} for i in range(0,5): d[a[0][i]] = a[1][i] return d def answer(d,a): word = random.choice(list(d.values())) for key in d: if d[key] == word: print('твоя подсказка:',key) while True: ans = input('введи слово') if ans == word: return random.choice(a[2]) else: print(random.choice(a[3])) print('мы загадали слово для тебя') print(answer(dictionary(open_file()),open_file())) import re def open_text(): words = [] with open('text.txt', 'r', encoding ='utf-8') as f: text = f.read().lower() text = text.split() for item in text: item = item.strip('.,?!-') if item not in words: words.append(item) return words def answer(words): for item in words: m = re.match( r'\bси(д(и(шь|те?|м)?|е(л(о|а|и)?|в(ш(и(й|ми?|е|х)?|е(го|му?|е|й|ю)|ая|ую))?|ть)|я(т|щ(и(й|ми?|е|х)|е(го|му?|е|й|ю)|ая|ую))?)|жу)\b', item) if m != None: print(item) sit = answer(open_text()) quantity = 0 percent = 0 f = open('newy.txt','r',encoding ='utf-8') for line in f: quantity += 1 a = line.split() if len(a) > 5: percent += 1 else: continue a = [] f.close() if percent == 0 or quantity == 0: print(' no lines like this') else: print ('the number of lines:', percent / quantity * 100) import os def walking(): d = {root : len(files) for root, dirs, files in os.walk('.')} maxi = max(d.values()) for key in d: if d[key] == maxi: print ('there are',maxi,'files in',key) walking() import re def open(): with open('ptitsi.html','r', encoding = 'utf-8') as f: content = f.read() return content def substitute(content): content = re.sub('<.*?>','', content, flags = re.DOTALL) content = re.sub(r'(\n| ){2,}','' ,content, flags = re.DOTALL) content = re.sub('птиц(а(ми?|х)|ы|е(й|ю)?|у)?','рыб\\1', content) content = re.sub('Птиц(а(ми?|х)|ы|е(й|ю)?|у)?','Рыб\\1', content) return content def write(content): with open('text.txt','w', encoding = 'utf-8') as f: f.write(content) print(write(substitute(open())) import random def imperative(): with open('imperatives.txt', 'r',encoding = 'utf-8') as f: imperatives =[] for line in f: newword = line.strip() imperatives.append(newword) return random.choice(imperatives) def noun_acc(): with open('nouns_Acc_Sg&Pl.txt', 'r',encoding = 'utf-8') as f: noun_accs =[] for line in f: newword = line.strip() noun_accs.append(newword) return random.choice(noun_accs) def ins_phrase(): with open('clitics_Ins.txt', 'r',encoding = 'utf-8') as f: clitics = [] for line in f: newword = line.strip() clitics.append(newword) with open('nouns_Ins.txt', 'r',encoding = 'utf-8') as g: noun_inss = [] for line in g: newword = line.strip() noun_inss.append(newword) return random.choice(clitics) + ' ' + random.choice(noun_inss) def noun_pl(): with open('nouns_ Nom=Acc_Pl.txt', 'r',encoding = 'utf-8') as f: noun_pls = [] for line in f: newword = line.strip() noun_pls.append(newword) return random.choice(noun_pls) def noun_sg(): with open('nouns_Nom=Acc_Sg.txt', 'r',encoding = 'utf-8') as f: noun_sgs = [] for line in f: newword = line.strip() noun_sgs.append(newword) return random.choice(noun_sgs) def verb(): with open('verbs_Pl.txt', 'r',encoding = 'utf-8') as f: verbs = [] for line in f: newword = line.strip() verbs.append(newword) return random.choice(verbs) def adverb(): with open('adverbs.txt', 'r',encoding = 'utf-8') as f: adverbs = [] for line in f: newword = line.strip() adverbs.append(newword) return random.choice(adverbs) def punctuation(): marks = ['.', '!', '...'] return random.choice(marks) def type1(): return imperative() + ' ' + noun_acc() + punctuation() def type2(): return noun_pl() + ' ' + verb() + punctuation() def type3(): return imperative() + ' ' + ins_phrase() + punctuation() def type4(): return noun_pl() + ' ' + verb() + ' ' + noun_pl() + punctuation() def type5(): return noun_pl() + ' ' + verb() + ' ' + noun_sg() + punctuation() def type6(): return ins_phrase() + ' ' + imperative() + ' ' + noun_sg() + punctuation() def type7(): return imperative() + ' ' + noun_acc() + ' ' + adverb() + punctuation() def tanka(i): line ='' if (i == 1) or (i == 3): line = random.choice([1,2,3]) if line == 1: line = type1() if line == 2: line = type2() if line == 3: line = type3() else: line = random.choice([4,5,6,7]) if line == 4: line = type4() if line == 5: line = type5() if line == 6: line = type6() if line == 7: line = type7() return line def printing(): for i in range(1,6): print(tanka(i)) a = printing() def open_text(text): with open(text, 'r', encoding ='utf-8') as f: text = f.read().lower() words = text.split() return words def percent(words, number): i,j = 0,0 for item in words: if item[0:2] =='un': i+=1 if len(item) > number: j +=1 if i != 0: print('the number of words:', i) return str(round(j / i * 100)) + '%' else: return 'no matching words were found' def questions(): text = input(' Please, enter the name of the text') number = int(input(' Please, enter the lenght')) words = open_text(text) answer = percent(words, number) return answer print('your result is', questions()) n = int(input( )) w = 0 i = 0 while w <= n: w = 2**i i += 1 if w % 2 == 0 and w <= n: print (w) import re def open_text(): with open('archi.html','r', encoding = 'utf-8') as f: text = f.read() return text def search(text): m = re.search(r'title="Коды языков".*?title="ISO (\d\d\d)"',text, flags = re.DOTALL) return m.group(1) def write(z): with open('archi.txt','w', encoding = 'utf-8') as f: f.write(z) archi = write(search(open_text())) import os import re def search(): count = 0 a =[] for f in os.listdir(): if os.path.isdir(f) and f not in a: lat = re.search('.*[a-zA-z].*', str(f)) rus = re.search('.*[а-яА-ЯЁё].*', str(f)) if lat != None and rus != None: count+=1 a.append(f) if count == 1: print('1 dir was found', end = '') else: print (count, 'dirs were found ', end ='') if a != [] : print( ':'+', '.join(a)) search() count = 0 arr = ['','','',''] while count < 4: s = input('vvedi slovo') arr [ int(count)] += s s = '' count += 0.5 for i in range (0,4): print(arr[i]) a = int(input('введи а')) b = int(input('введи b')) c = int(input('введи с')) if a / b == c: print('а разделить на b равно с') else: print('а разделить на b не равно с') if a ** b == c: print(' а в степени b равно c') else: print(' а в степени b не равно с') with open ('hw5.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () list_1 = [] for line in lines: line = line.split() n = len (line) list_1.append (n) sum_list = 0 sum_line = 0 for elem in list_1: if elem > 5: sum_list += 1 sum_line += 1 else: sum_list += 1 percent = (sum_line / sum_list) * 100 print (percent, '% строк содержит больше 5 слов') import re def opentext(text): with open(text, 'r', encoding = 'utf-8') as f: sentences = f.read() text = re.sub('\.(\.\.)?|\?', '!', sentences) list_ = text.split('!') return list_ def text_format(text): text = opentext(text) text1 = [re.sub('( - )|( — )|( ‒ )', ' ', i) for i in text] sents = [sent.split() for sent in text1] sents2 = [[i.strip('.,?!":; sents3 = [[i.lower() for i in sent] for sent in sents2] return sents3 def search(text): sentences = text_format(text) repeated = [[w for w in sent if sent.count(w) > 1] for sent in sentences] return repeated def count(text): a = search(text) b = opentext(text) for i in range(len(a)): if a[i]: print (str(b[i]) + '\n') c = {w : a[i].count(w) for w in a[i]} keys = c.keys() for key in keys: print ('{:^10}'.format(key) + '{:^10}'.format(c[key])) text = input('Введите название файла: ') count(text) import re def opentext(text): with open(text, 'r', encoding = 'utf-8') as f: text = f.readlines() list_ = [] for line in text: line = line.split() list_.extend(line) words = [] for i in range(len(list_)): a = list_[i] a = a.strip('.,?!"":;*()%$ words.append(a) return words def find_form(): form = 'си((жу)|д((и((шь)|м|(те?))?)|(е((ть)|(л(а|и|о)?)|(в(ш((и(й|е|х|(ми?))?)|(е((го)|(му?)|й|е)?)|(ая)|(ую))))))|(я(щ((и(й|(ми?)|х|е))|(е((го)|(му?)|й|е))|(ая)|(ую)))?)))' form2 = 'буд((ут?)|(е(м|(шь)|(те?))))' words = opentext(text) forms = [] for i in range(len(words)): m = re.search(form, words[i]) if m != None: if words[i] == 'сидеть' and re.search(form2, words[i-1]) != None: form_fut = words[i-1] + ' ' + words[i] if form_fut not in forms: forms.append(form_fut) else: continue else: if words[i] not in forms: forms.append(words[i]) else: continue else: continue return forms text = input('Введите название файла: ') m = find_form() print ('Формы глагола "сидеть", встретившиеся в тексте:') for i in range(len(m)): print (m[i], end = '\n') l = [] for i in range(8): l.append (input()) print (l[0]+l[1]) print (l[2]+l[3]) print (l[4]+l[5]) print (l[6]+l[7]) import os import re def list_files(path): files_list = [] for d, dirs, files in os.walk(path): for f in files: path_f = os.path.join(d, f) files_list.append(path_f) return files_list def open_file(f): with open(f, 'r', encoding = 'utf-8') as k: text = k.readlines() return text def count_sent(path): files = list_files(path) list_sent = {} for f in files: b = re.search('(_.*?.xhtml)', f) f_name = b.group(1) sent = 0 file_text = open_file(f) for line in file_text: if re.search('', line) != None: sent = sent + 1 list_sent[f_name] = sent return list_sent def file_format_sent(path): sent = count_sent(path) with open('task1.txt', 'w', encoding = 'utf-8')as k: for key in sent.keys(): k.write(key + '\t' + str(sent[key]) + '\n') def inf(f): text = open_file(f) inf = {} for line in text: author = re.search('content="(.*?)" name="author"', line) if author != None: author1 = author.group(1) for line in text: topic = re.search('content="(.*?)" name="topic"', line) if topic != None: topic1 = topic.group(1) inf[author1] = topic1 return inf def create_csv(path): files = list_files(path) with open('task2.csv', 'w', encoding = 'utf-8') as k: for f in files: infa = inf(f) f_name = re.search('(_.*?.xhtml)', f).group(1) for key in infa.keys(): k.write(str(f_name) + '\t' + str(key) + '\t' + str(infa[key]) + '\n') def pr_loc(f): text = open_file(f) bigrams = [] for i in range(len(text)): pr = re.search('gr="PR"', text[i]) if pr != None: prep = re.search('(.*?)', text[i]).group(1) loc = re.search('"S.*?loc', text[i+1]) if loc != None: S_loc = re.search('(.*?)', text[i+1]).group(1) bigrams.append(prep + ' ' + S_loc) return bigrams def text_without_tegs(f): text = open_file(f) text_w_t = '' for line in text: if re.search('', line) != None: word = re.search('(.*?)', line).group(1) prep = re.search('(.)()?', line) if prep != None: if prep.group(1) == '.' or prep.group(1) == '!' or prep.group(1) == '?': text_w_t = text_w_t + ' ' + word + prep.group(1)+'\n' else: text_w_t = text_w_t + ' ' + word + prep.group(1) else: text_w_t = text_w_t + ' ' + word return text_w_t def bigr(path): files = list_files(path) with open('task3.txt', 'w', encoding = 'utf-8') as k: for f in files: for b in pr_loc(f): k.write(b + '\n') path = 'C:\\Users\\1\\Documents\\ниу вшэ\\КИЛИ и программирование\\python\\экзамен\\news' file_format_sent(path) create_csv(path) bigr(path) import random def adjective_Abl_m(): with open('adjective_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def adjective_Abl_f(): with open('adjective_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def noun_Abl_m(): with open('noun_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def noun_Abl_f(): with open('noun_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def noun_phrase(): with open('prepositions.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() prep = random.choice(line) while prep != 'в' and prep != 'к' and prep != 'с': prep = random.choice(line) if prep == 'в' or prep == 'к': with open('noun_verse1_prep1.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) else: with open('noun_verse1_prep2.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) return prep.title() + ' ' + noun1 def noun_Gen(): with open('noun_Gen_verse1.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def verse11(): return adjective_Abl_m().title() + ' ' + noun_Abl_m() def verse12(): return adjective_Abl_f().title() + ' ' + noun_Abl_f() def verse13(): return noun_phrase() + ' ' + noun_Gen() def participle_adj(): with open('participle_adjective_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def subject(): with open('subject_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def place(): with open('places_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split(', ') return random.choice(line) def obj_f(): with open('adjective_obj_verse2_f.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() adj = random.choice(line) with open('object_verse2_f.txt', 'r', encoding = 'utf-8') as k: objects = k.readlines() for obj in objects: obj = obj.split() obj = random.choice(obj) return adj + ' ' + obj def obj_m(): with open('object_verse2_m.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() obj1 = random.choice(line) with open('object_Gen_verse2_m.txt', 'r', encoding = 'utf-8') as k: objects = k.readlines() for obj in objects: obj = obj.split() obj2 = random.choice(obj) with open('adjective_obj_verse2_m.txt', 'r', encoding = 'utf-8') as l: adjectives = l.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) return adj + ' ' + obj2 + ' ' + obj1 def verse21(): return participle_adj().title() + ' ' + subject() + ' ' + place() + '.' def verse22(): with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() verb = random.choice(line) return verb.title() + ' ' + obj_f() def verse23(): with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() verb = random.choice(line) return verb.title() + ' ' + obj_m() def verb_feel(): with open('verb_feelings.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def verse31(): with open('prepositions.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() prep = random.choice(line) with open('base_noun_verse3.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() base_noun = random.choice(noun) if prep == 'под' or prep == 'над': if base_noun == 'мор' or base_noun == 'солнц': noun = base_noun + 'ем' else: noun = base_noun + 'ом' elif prep == 'у' or prep == 'от' or prep == 'из': if base_noun == 'мор': noun = base_noun + 'я' else: noun = base_noun + 'а' elif prep == 'при' or prep == 'на': noun = base_noun + 'е' elif prep == 'с': if base_noun == 'мор' or base_noun == 'солнц': noun = base_noun + 'ем' else: noun = base_noun + 'ом' prep = 'как с' elif prep == 'в': noun = base_noun + 'е' prep = 'как в' elif prep == 'к': if base_noun == 'мор': noun = base_noun + 'ю' else: noun = base_noun + 'у' prep = 'как к' else: if base_noun == 'мор': noun = base_noun + 'ю' else: noun = base_noun + 'у' return verb_feel().title() + ',' + ' ' + prep + ' ' + noun def verse32(): with open('participle_verse3.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() participle = random.choice(line) with open('subject_verse3.txt', 'r', encoding = 'utf-8') as k: subjects = k.readlines() for sub in subjects: sub = sub.split() subject = random.choice(sub) return participle.title() + ' ' + subject + '.' def verse41(): with open('noun_verse41_1.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() noun1 = random.choice(line) with open('prepositions.txt', 'r', encoding = 'utf-8') as k: preps = k.readlines() for preposition in preps: preposition = preposition.split() prep = random.choice(preposition) while prep == 'в' or prep == 'к' or prep == 'с': prep = random.choice(preposition) if prep == 'под' or prep == 'над': with open('noun_verse41_2.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'у' or prep == 'от' or prep == 'из': with open('noun_verse41_3.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'при': with open('noun_verse41_4.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'на': with open('noun_verse41_5.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) else: with open('noun_verse41_6.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) if noun1 == 'дрожь' or noun1 == 'ночь' or noun1 == 'сталь' or noun1 == 'тень' or noun1 == 'кровь' or noun1 == 'плеть': with open('verb_verse41_1.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) else: with open('verb_verse41_2.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) return noun1.title() + ' ' + prep + ' ' + noun2 + ' ' + verb1 + '.' def noun42(): with open('object_verse42.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() return random.choice(line) def the_end_of_the_line(): with open('prepositions.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() line.append('во' and 'со' and 'ко') line.remove('под') line.remove('у') line.remove('от') line.remove('по') line.remove('из') prep = random.choice(line) if prep == 'во': with open('noun_verse42_1.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'со': noun = 'мной' elif prep == 'ко': with open('noun_verse42_2.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'при' or prep == 'на': if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'): with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'ней': noun2 = random.choice(noun) else: with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'нем': noun2 = random.choice(noun) elif prep == 'в': with open('noun_verse42_4.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'с': with open('noun_verse42_5.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) elif prep == 'к': with open('noun_verse42_6.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) else: if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'): with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'ней': noun2 = random.choice(noun) else: with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun2 = random.choice(noun) while noun2 == 'нем': noun2 = random.choice(noun) return prep.title() + ' ' + noun2 def verse42(): with open('pronoun_verse4.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() pronoun = random.choice(line) return verb_feel().title() + ' ' + noun42() + ' ' + pronoun + '... ' + the_end_of_the_line() def verse51(): with open('pronoun_verse5.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() pronoun = random.choice(line) if pronoun == 'вся' or pronoun == 'та': with open('adjective_verse5_f_4.txt', 'r', encoding = 'utf-8') as k: adjectives = k.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) elif pronoun == 'весь' or pronoun == 'тот': with open('adjective_verse5_m_3.txt', 'r', encoding = 'utf-8') as k: adjectives = k.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) with open('noun_verse5_m.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) else: with open('adjective_verse5_f_3.txt', 'r', encoding = 'utf-8') as k: adjectives = k.readlines() for adjective in adjectives: adjective = adjective.split() adj = random.choice(adjective) with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l: nouns = l.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) return pronoun.title() + ' ' + adj + ' ' + noun1 + '.' def verse52(): with open('parenthesis_verse5.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() for line in lines: line = line.split() parenthesis = random.choice(line) with open('noun_verse52.txt', 'r', encoding = 'utf-8') as k: nouns = k.readlines() for noun in nouns: noun = noun.split() noun1 = random.choice(noun) if noun1 == 'звезда' or noun1 == 'вуаль' or noun1 == 'туман': with open('verb_verse52_sg.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) else: with open('verb_verse52_pl.txt', 'r', encoding = 'utf-8') as l: verbs = l.readlines() for verb in verbs: verb = verb.split() verb1 = random.choice(verb) return parenthesis.title() + ' ' + noun1 + ' ' + verb1 + '?!' def poem(): variant = random.choice([1, 2, 3, 4, 5, 6]) if variant == 1: var = random.choice([1, 2]) if var == 1: return verse11() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() else: return verse12() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() elif variant == 2: var = random.choice([1, 2]) if var == 1: return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() else: return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() elif variant == 3: var = random.choice([1, 2, 3, 4]) if var == 1: return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() elif var == 2: return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() elif var == 3: return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() else: return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() elif variant ==4: return verse13() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52() elif variant == 5: var = random.choice([1, 2]) if var == 1: return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() else: return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52() else: var = random.choice([1, 2, 3, 4]) if var == 1: return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() elif var == 2: return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() elif var == 3: return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() else: return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51() print (poem()) import re def open_file(): with open('Птицы.html', 'r', encoding = 'utf-8') as f: text = f.read() return text def sub_word(): word1 = '\\bпти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b' word2 = '\\bПти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b' s = re.sub(word1, 'рыб\\1', open_file()) m = re.sub(word2, 'Рыб\\1', s) return m def add_file(): with open('Замена.html', 'w', encoding = 'utf-8') as k: k.write(sub_word()) return k add_file() def data (year, month, day): if month > 12: return False else: if day >= 31: return False else: if day == 31 and (month == 2 or month == 4 or month == 9 or month == 11 or month == 6): return False else: if day == 30 and month == 2: return False else: if day == 29 and month == 2 and (year % 4 != 0 or (year % 100 == 0 and year % 1000 != 0)): return False elif day == 16 and month == 12 and year == 1998: print ("Вы угадали день рождения разработчика! Не забудьте его поздравить :)") else: return True year = input ("Введите год (натуральное число): ") month = input ("Введите месяц (натуральное число до 12 включительно): ") day = input ("Введите день (натуральное число до 31 включительно): ") while year and month and day: if data (int(year), int(month), int(day)) == True: print ("Такая дата есть в календаре:)") elif data (int(year), int(month), int(day)) == False: print ("Простите, но такой даты нету:(") else: print (data (int(year), int(month), int(day))) print ("Попробуем снова:)") year = input ("Введите год (натуральное число): ") month = input ("Введите месяц (натуральное число до 12 включительно): ") day = input ("Введите день (натуральное число до 31 включительно): ") print ("Все!:)") a = int (input ()) b = int (input ()) c = int (input ()) s = (a + 1) // 2 + (b + 1) // 2 + (c + 1) // 2 print (s) print (os.path.abspath('.')) print (os.getcwd()) os.path.join('texts', '1.txt') os.path.exists('texts') print (os.listdir('.')) s = 'hello' i = 1 texts = [f for f in os.listdir('.') if f.endswith('.txt')] print (texts) for f in os.listdir('.'): if f.endswith('.txt'): with open(f, 'a', encoding = 'utf-8') as w: w.write (s*i) i += 1 os.mkdir('corpus1') os.makedirs('a\\b\\long\\long') os.rename('texts\\1.txt', 'texts\\2.txt') os.path.isfile(r'texts\corpus1.txt') os.path.isdir(r'texts') shutil.copy(r'texts\2.txt', r'new_corpus\2.txt') shutil.move('откуда', 'куда') shutil.copytree('папка', 'папка2') os.remove(r'new_corpus\2.txt') shutil.rmtree('corpus') def align_right(arr): for i in arr: print ('{:>40}'.format(i)) arr = ['abba', 'assa', 'adda', 'affa'] align_right(arr) def tokenize(text): tokens = text.split() tokens1 = [t.strip('.,?!":;*()-— ') for t in tokens] tokens2 = [t.lower() for t in tokens1] return tokens2 text = 'Инициатива публикации лучших дисциплин исходила в том числе от Студсовета. Чуть ранее представители Студенческого совета получили возможность использовать результаты СОП при обсуждении возникающих проблем и спорных моментов. Теперь все студенты смогут использовать опубликованную информацию — агрегированное мнение своих предшественников — при формировании собственной индивидуальной образовательной траектории.' print(tokenize(text)) def tabulate(a): for i in range(0, len(a)): print('{:<10}'.format(a[i][0]) + '{:^10}'.format(a[i][1]) + '{:>10}'.format(a[i][2])) a = [('кошки','собаки','коровы'), ('мяу','гав','му'), (3,3,2)] tabulate(a) x = int (input ()) if x > 0: sign = 1 elif x < 0: sign = -1 else: sign = 0 print (sign) a = int (input ()) b = int (input ()) if a < b: print (a) else: print (b) x = int (input ('введите целое число x = ')) print ('вы ввели число', x) res = x*55/100+33 print ('результат вычислений x * 55 / 100 + 33 =', res) a = int (input ('введите длину первого катета a = ')) b = int (input ('введите длину второго катета b = ')) S = a * b / 2 print (S) import re def func1(regw, word1): word = input('Введите слово: ') m = re.search(regw, word) if m != None: return 'Данное слово является формой слова ' + word1 else: return 'Данное слово не является формой слова ' + word1 word1 = 'свобода' regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b' def if_any(s, regw): m = re.search(regw, s) s = s.split() p = [] for i in range(len(s)): m = re.search(regw, s[i]) if m != None: p = p.append(s[i]) else: continue return 'Слово встречается в тексте ' + len(p) + ' раз' s = 'Свободу попугаям!' print(if_any(s, regw)) import re import os import shutil import re def make_folders_sent(s): sent = s.split() b = '\\'.join(sent) os.makedirs(b) s = input('Пожалуйста, введите предложение (без знаков препинания!) \n') make_folders_sent(s) def make_folders_num(n): for i in range(1,n+1): os.mkdir(str(i)) for a in range(i): name = str(i) + '\\' + str(a+1) + '.txt' file = open(name, 'w', encoding = 'utf-8') file.write('Hello!') n = int(input('Пожалуйста, введите натуральное число \n')) make_folders_num(n) def count(): filelist = [f for f in os.listdir('.') if os.path.isfile(f)] exts = [] for f in filelist: ext = f.split('.')[-1] exts.append(ext) c = {e : exts.count(e) for e in exts} keys = c.keys() for key in keys: print('{:^10}'.format(key) + '{:^10}'.format(c[key])) count() name = input ('Введите ваше имя: ') age = input ('Сколько вам лет? ') colour = input ('Какой ваш любимый цвет? ') music = input ('Кто ваш любимый музыкальный исполнитель? ') dream = input ('Какова ваша заветная мечта? ') with open ('information.txt', 'w', encoding = 'utf-8') as f: f.write ('Информация о соседе\n') f.write (name + '\n' + age + '\n' + colour + '\n' + music + '\n' + dream) with open('Austen_Jane_Pride_and_Prejudice.txt', 'r', encoding = 'utf-8') as f: text = f.readlines() list_ = [] for line in text: line = line.split() list_.extend(line) print (list_) import re with open ('freq.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () for line in lines: if 'союз' in line: print (line) with open ('freq.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () a = [] for line in lines: line = line.split () if 'жен' in line and 'ед' in line: print (line[0], end = ', ') a.append (line[-1]) ipm_sum = 0 for elem in a: elem = float (elem) ipm_sum += elem print (ipm_sum) with open ('freq.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines () word = input () while word: for line in lines: line = line.split() if word in line: print ('Морфологическая информация: ' + ' '.join (line[2:-2])) print ('IPM = ' + line[-1]) word = input () import random with open ('words.txt', 'r', encoding = 'utf-8') as f: lines = f.readlines() random.shuffle(lines) score = 0 for line in lines: line = line.strip () word, hint = line.split(' ', 1) response = input ('Какое слово я загадала?\n ' + 'Подсказка: ' + hint + ' ') if response == word: print ('Правильно, молодец!') score += 1 else: print ('А вот и нет, слово было ', word) with open ('scores.txt', 'w', encoding = 'utf-8') as n: percent = score / 5 * 100 n.write('Вот результат\n') n.write(str(percent) + '%') import re def func1(regw, word1): word = input('Введите слово: ') m = re.search(regw, word) if m != None: return 'Данное слово является формой слова ' + word1 else: return 'Данное слово не является формой слова ' + word1 word1 = 'свобода' regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b' def if_any(s, regw): m = re.search(regw, s) s = s.split() p = [] for i in range(len(s)): m = re.search(regw, s[i]) if m != None: p = p.append(s[i]) else: continue return 'Слово встречается в тексте ' + len(p) + ' раз' s = 'Свободу попугаям!' print(if_any(s, regw)) import re def opentext(a): with open(a, 'r', encoding = 'utf-8') as f: text = f.read() return text def delete_tags(): s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL) return s def delete_odd(): s = re.sub ('(\\s)+', '\\1', delete_tags()) return s name = input('Введите название файла: ') print (delete_odd()) n = int (input ()) hour = n // 60 if hour >= 24: k = hour // 24 hour = hour - k * 24 minute = n % 60 print (hour, minute) import re def opentext(a): with open (a, 'r', encoding = 'utf-8') as f: content = f.read() return content def find_all_links(): reg = r'(.*?)' links = re.findall(reg, opentext(a)) return links a = input('Введите название файла: ') def pictures(): reg = r'

(.*?)

' pictures = re.findall(reg, opentext(a)) return pictures pictures = pictures() print ('Подписи к картинкам: ') for picture in pictures: print (picture[2]) def opentext(fname): with open(fname, 'r', encoding = 'utf-8') as f: text = f.readlines() for line in text: line = line.split() list_ = [] for i in range (0, len(line)): a = line[i] a = a.lower() a = a.strip('.,?!";:"*()') list_.append(a) return list_ def first_letter(letter): fname = input('введите название файла: ') text = opentext(fname) words_letter = [] for i in range(len(text)): if text[i].startswith(letter) == True: words_letter.append(text[i]) else: continue return words_letter def questions(): letter = input('введите первую букву: ') number = int(input('введите число: ')) words = first_letter(letter) result = [] for i in range(len(words)): if len(words[i]) > number: result.append(words[i]) else: continue return result print (questions()) a = int (input ('введите первое число ')) b = int (input ('введите второе число ')) c = int (input ('введите третье число ')) s = a + b + c print (s) import os def drawtree(): for root, dirs, files in os.walk('C:\\Users\\1\\Documents\\ниу вшэ'): num = root.count('\\') new_root = root.split('\\')[-1] print('\t'*num+'--'+new_root+'\n') for f in files: print((num+1)*'\t'+f) drawtree() name = input () print ('Hello, ' + name + '!') import re def opentext(a): with open(a, 'r', encoding = 'utf-8') as f: text = f.read() return text def delete_tags(): s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL) return s def delete_odd(): s = re.sub ('(\\s)+', '\\1', delete_tags()) return s name = input('Введите название файла: ') print (delete_odd()) import re def opentext(a): with open (a, 'r', encoding = 'utf-8') as f: content = f.read() return content def find_all_links(): reg = r'(.*?)' links = re.findall(reg, opentext(a)) return links a = input('Введите название файла: ') def pictures(): reg = r'

(.*?)

' pictures = re.findall(reg, opentext(a)) return pictures pictures = pictures() print ('Подписи к картинкам: ') for picture in pictures: print (picture[2]) num = int (input ()) t = 1 while t * 2 <= num: t = t * 2 print (t) def opentext(fname): with open(fname, 'r', encoding = 'utf-8') as f: text = f.readlines() list_ = [] for line in text: line = line.split() list_.extend(line) words = [] for i in range(len(list_)): a = list_[i] a = a.lower() a = a.strip('.,?!";:"*()') words.append(a) return words def un_forms(): text = opentext(fname) words_un = [] for i in range(len(text)): if text[i].startswith('un') == True: words_un.append(text[i]) else: continue return words_un def quantity(): words = un_forms() return len(words) def percentage(number): words = un_forms() s = 0 for i in range(len(words)): if len(words[i]) > number: s += 1 else: continue result = s / len(words) * 100 return result fname = input('Введите название файла: ') number = int(input('Введите число: ')) print ('Количество слов с приставкой un- равно ', quantity()) print ('Процент слов с приставкой un- длинее ', number, ' равен ', percentage(number)) import os import shutil import re def all_files(): ff = os.listdir('.') file_names = [] for f in ff: if os.path.isfile(f): a = f.split('.') if a[-1].isdigit() or re.search(r'\s', a[-1]) != None: a = '.'.join(a) elif len(a) > 2: a[0] = '.'.join(a[:-1]) name = a[0] file_names.append(name) return file_names def all_dirs(): ff = os.listdir('.') dir_names = [] for f in ff: if os.path.isdir(f): name = f dir_names.append(name) return dir_names def all_without_rep(): names_file = all_files() names_dir = all_dirs() names = names_file + names_dir names_1 = [] for name in names: if name not in names_1: names_1.append(name) return names_1 def out_nice(): names = all_without_rep() print('Список папок и файлов в текущей директории: ') for name in names: print (name) def cyrill_latin_symb_fold(): names = all_dirs() lat = '[a-zA-Z]' cyr = '[а-яА-Я]' cyr_lat_dirs = [name for name in names if re.search(lat, name) != None and re.search(cyr, name) != None] return len(cyr_lat_dirs) out_nice() print ('Количество папок, содержащих и латинские, и кириллические символы, равно: ', cyrill_latin_symb_fold()) import random def file(): with open('dictionary.csv', 'r', encoding = 'utf-8') as f: lines = f.readlines() d = {} for line in lines: line = line.split(';') d[line[0]] = line[1].strip('\n') return d def right(): with open('Верные ответы.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = text.split('\n') return random.choice(text) def wrong(): with open('Неверные ответы.txt', 'r', encoding = 'utf-8') as f: text = f.read() text = text.split('\n') return random.choice(text) def zagadka(d): keys = d.keys() keys = list(keys) key = random.choice(keys) print ('Подсказка: ' + key + '...') answer = input('Введите ответ: ') if answer == d[key]: return(right()) else: return(wrong() + ' Верный ответ ' + d[key] + '.') d = file() a = input('Хочешь поиграть? Введи "да" или "нет":)\n') while a == 'да': print(zagadka(d)) a = input('Хочешь сыграть еще раз?:) Введи "да" или "нет"\n') print ('До свидания!') a = int (input ()) b = int (input ()) c = int (input ()) if a / b == c: print (a, "разделить на", b, "равно", c) else: print (a, "разделить на", b, "не равно", c) if a ** b == c: print (a, "в степени", b, "равно", c) else: print (a, "в степени", b, "не равно", c) import os def max_dir(): a = {} for root, dirs, files in os.walk(os.path.abspath('.')): a[root] = len(files) max_v = max(a.values()) if max_v == 1: print('Наибольшее количество файлов (' + str(max_v) +' файл) в директориях: ') elif max_v == 2 or max_v == 3 or max_v == 4: print('Наибольшее количество файлов (' + str(max_v) +' файла) в директориях: ') else: print('Наибольшее количество файлов (' + str(max_v) +' файлов) в директориях: ') for key in a.keys(): if a[key] == max_v: print(key) max_dir() import re def open_file(a): with open(a, 'r', encoding = 'utf-8') as f: text = f.read() return text def find_ISO(): reg = 'ISO 639-3(\\w{3})' m = re.search(reg, open_file(a), flags = re.DOTALL) if m: ISO = m.group(2) return ISO else: return 'В статье не указано ISO 639-3' def add_file(): with open('Result.txt', 'a', encoding = 'utf-8') as k: k.write('\n') k.write(a) k.write(': ') k.write(find_ISO()) return k a = input('Введите название статьи в формате Название.html: ') add_file() word = input ('введите слово: ') for i in range (1,len(word)): print (word[i:-i]) with open('dict.csv', 'r', encoding='utf-8') as f: lines = f.readlines() a = dict() for line in lines: line = line.strip('\n') key, value = line.split(':', 1) a[key] = value for key in a: b = input('Угадай слово. Вот подсказка: '+a[key]) if b == key: print('Правильно!') else: t = 0 while b != key and t <= (len(key)-1): b = input('Неправильно, попробуй еще раз: ') t += 1 else: print('Правильно!') import os def greatestway(): depth = [] for root, dirs, files in os.walk('.', topdown=False): a = str(root).count('/') if a not in depth: depth.append(a) return max(depth) print(greatestway()) import re import os def openfile(): for root, dirs, files in os.walk('.\\news2'): for f in files: with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text: file_text = text.read() file_text = re.sub('<.*?>', '', file_text) file_text2 = file_text.split('.') count = len(file_text2) print(f, ' ', count) return def meta(): for root, dirs, files in os.walk('.\\news2'): for f in files: with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text: file_text = text.read() writer = re.match('', file_text).group(1) topic = re.match('', file_text).group(1) with open('.\\table.csv', 'w', encoding='utf-8') as csv_f: heading_string = 'Файл' + ' ' + 'Автор' + ' ' + 'Тема' csv_f.write(heading_string) with open('.\\table.csv', 'a', encoding='utf-8') as csv_writer: string = f + ' ' + writer + ' ' + topic csv_writer.write(string) return print(openfile()) print(meta()) import re def openf(): with open('F.xml', 'r', encoding='utf=8') as f: lines = f.readlines() return lines def countli(): lines = openf() linecount = 0 for line in lines: linecount += 1 results = 'result.txt' with open(results, 'w', encoding='utf-8') as n: n.write(str(linecount)) return results def dicfreq(): lines = openf() types = [] for line in lines: l = str(line) if 'lemma' in l: reg = re.search(r'', l) types.append(reg.group(2)) freq = {} for i in range(len(types)): if types[i] not in freq: freq[types[i]] = 1 else: freq[types[i]] += 1 with open('keys.txt', 'w', encoding='utf-8') as te: te.write('\n'.join(freq.keys())) return freq print(countli(), dicfreq()) import re fname = input('Введите название файла: ') def openfile(fname): with open(fname,'r', encoding='utf-8') as f: text = f.read() return text def sentences(): text = openfile(fname) text = text.strip() se = re.split('\\b[.!?\\n]+(?=\\s)', text) return se def find8(): se = sentences() greater7 = [] for i in se: words = i.split(' ') words = [str(w).strip('?!&(),.:;«»\n”“ ') for w in words] greater = [] greater += [w for w in words if len(w) > 7] template = '{} {:->10}' for g in greater: print(template.format(g,len(g))) return print(find8()) import re import os import shutil filename = [] unique = [] name = '' def numberinf(): number = 0 for f in os.listdir('REALEC'): name = str(f) b = re.sub(r'\.\D+', '', name) c = re.search(r'\d', b) if c != None: number += 1 return number def foldername(): for f in os.listdir('REALEC'): name = str(f) b = re.sub(r'\.\D+', '', name) filename.append(b) for n in filename: if n != '' and n not in unique: unique.append(n) return unique print(numberinf(), foldername()) import random with open('words.txt', 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: line = line.strip() def nm1(): nm1 = list() nm1 = lines[0] nm1 = nm1.strip() nm1 = nm1.split(' ') nm1.remove(nm1[0]) return random.choice(nm1) def nf1(): nf1 = list() nf1 = lines[1] nf1 = nf1.strip() nf1 = nf1.split(' ') nf1.remove(nf1[0]) return random.choice(nf1) def nm2(): nm2 = list() nm2 = lines[2] nm2 = nm2.strip() nm2 = nm2.split(' ') nm2.remove(nm2[0]) return random.choice(nm2) def nf2(): nf2 = list() nf2 = lines[3] nf2 = nf2.strip() nf2 = nf2.split(' ') nf2.remove(nf2[0]) return random.choice(nf2) def nm3(): nm3 = list() nm3 = lines[4] nm3 = nm3.strip() nm3 = nm3.split(' ') nm3.remove(nm3[0]) return random.choice(nm3) def nf3(): nf3 = list() nf3 = lines[5] nf3 = nf3.strip() nf3 = nf3.split(' ') nf3.remove(nf3[0]) return random.choice(nf3) def nm4(): nm4 = list() nm4 = lines[6] nm4 = nm4.strip() nm4 = nm4.split(' ') nm4.remove(nm4[0]) return random.choice(nm4) def nf4(): nf4 = list() nf4 = lines[7] nf4 = nf4.strip() nf4 = nf4.split(' ') nf4.remove(nf4[0]) return random.choice(nf4) def nm5(): nm5 = list() nm5 = lines[8] nm5 = nm5.strip() nm5 = nm5.split(' ') nm5.remove(nm5[0]) return random.choice(nm5) def nf5(): nf5 = list() nf5 = lines[9] nf5 = nf5.strip() nf5 = nf5.split(' ') nf5.remove(nf5[0]) return random.choice(nf5) def nm6(): nm6 = list() nm6 = lines[10] nm6 = nm6.strip() nm6 = nm6.split(' ') nm6.remove(nm6[0]) return random.choice(nm6) def nf6(): nf6 = list() nf6 = lines[11] nf6 = nf6.strip() nf6 = nf6.split(' ') nf6.remove(nf6[0]) return random.choice(nf6) def adjm1(): adjm1 = list() adjm1 = lines[12] adjm1 = adjm1.strip() adjm1 = adjm1.split(' ') adjm1.remove(adjm1[0]) return random.choice(adjm1) def adjm2(): adjm2 = list() adjm2 = lines[13] adjm2 = adjm2.strip() adjm2 = adjm2.split(' ') adjm2.remove(adjm2[0]) return random.choice(adjm2) def adjf2(): adjf2 = list() adjf2 = lines[14] adjf2 = adjf2.strip() adjf2 = adjf2.split(' ') adjf2.remove(adjf2[0]) return random.choice(adjf2) def adjm3(): adjm3 = list() adjm3 = lines[15] adjm3 = adjm3.strip() adjm3 = adjm3.split(' ') adjm3.remove(adjm3[0]) return random.choice(adjm3) def adjf3(): adjf3 = list() adjf3 = lines[16] adjf3 = adjf3.strip() adjf3 = adjf3.split(' ') adjf3.remove(adjf3[0]) return random.choice(adjf3) def adjm4(): adjm4 = list() adjm4 = lines[17] adjm4 = adjm4.strip() adjm4 = adjm4.split(' ') adjm4.remove(adjm4[0]) return random.choice(adjm4) def adjf4(): adjf4 = list() adjf4 = lines[18] adjf4 = adjf4.strip() adjf4 = adjf4.split(' ') adjf4.remove(adjf4[0]) return random.choice(adjf4) def adjm5(): adjm5 = list() adjm5 = lines[19] adjm5 = adjm5.strip() adjm5 = adjm5.split(' ') adjm5.remove(adjm5[0]) return random.choice(adjm5) def adjf5(): adjf5 = list() adjf5 = lines[20] adjf5 = adjf5.strip() adjf5 = adjf5.split(' ') adjf5.remove(adjf5[0]) return random.choice(adjf5) def adjm6(): adjm6 = list() adjm6 = lines[21] adjm6 = adjm6.strip() adjm6 = adjm6.split(' ') adjm6.remove(adjm6[0]) return random.choice(adjm6) def adjf6(): adjf6 = list() adjf6 = lines[22] adjf6 = adjf6.strip() adjf6 = adjf6.split(' ') adjf6.remove(adjf6[0]) return random.choice(adjf6) def v1(): v1 = list() v1 = lines[23] v1 = v1.strip() v1 = v1.split(' ') v1.remove(v1[0]) return random.choice(v1) def v2(): v2 = list() v2 = lines[24] v2 = v2.strip() v2 = v2.split(' ') v2.remove(v2[0]) return random.choice(v2) def v3(): v3 = list() v3 = lines[25] v3 = v3.strip() v3 = v3.split(' ') v3.remove(v3[0]) return random.choice(v3) def v4(): v4 = list() v4 = lines[26] v4 = v4.strip() v4 = v4.split(' ') v4.remove(v4[0]) return random.choice(v4) def v5(): v5 = list() v5 = lines[27] v5 = v5.strip() v5 = v5.split(' ') v5.remove(v5[0]) return random.choice(v5) def v6(): v6 = list() v6 = lines[28] v6 = v6.strip() v6 = v6.split(' ') v6.remove(v6[0]) return random.choice(v6) def partm3(): partm3 = list() partm3 = lines[29] partm3 = partm3.strip() partm3 = partm3.split(' ') partm3.remove(partm3[0]) return random.choice(partm3) def partm4(): partm4 = list() partm4 = lines[30] partm4 = partm4.strip() partm4 = partm4.split(' ') partm4.remove(partm4[0]) return random.choice(partm4) def partf4(): partf4 = list() partf4 = lines[31] partf4 = partf4.strip() partf4 = partf4.split(' ') partf4.remove(partf4[0]) return random.choice(partf4) def partm5(): partm5 = list() partm5 = lines[32] partm5 = partm5.strip() partm5 = partm5.split(' ') partm5.remove(partm5[0]) return random.choice(partm5) def partf5(): partf5 = list() partf5 = lines[33] partf5 = partf5.strip() partf5 = partf5.split(' ') partf5.remove(partf5[0]) return random.choice(partf5) def partm6(): partm6 = list() partm6 = lines[34] partm6 = partm6.strip() partm6 = partm6.split(' ') partm6.remove(partm6[0]) return random.choice(partm6) def partf6(): partf6 = list() partf6 = lines[35] partf6 = partf6.strip() partf6 = partf6.split(' ') partf6.remove(partf6[0]) return random.choice(partf6) def conj1(): conj1 = list() conj1 = lines[36] conj1 = conj1.strip() conj1 = conj1.split(' ') conj1.remove(conj1[0]) return random.choice(conj1) def conj2(): conj2 = list() conj2 = lines[37] conj2 = conj2.strip() conj2 = conj2.split(' ') conj2.remove(conj2[0]) return random.choice(conj2) def numm2(): numm2 = list() numm2 = lines[38] numm2 = numm2.strip() numm2 = numm2.split(' ') numm2.remove(numm2[0]) return random.choice(numm2) def numf2(): numf2 = list() numf2 = lines[39] numf2 = numf2.strip() numf2 = numf2.split(' ') numf2.remove(numf2[0]) return random.choice(numf2) def numm3(): numm3 = list() numm3 = lines[40] numm3 = numm3.strip() numm3 = numm3.split(' ') numm3.remove(numm3[0]) return random.choice(numm3) def numf3(): numf3 = list() numf3 = lines[41] numf3 = numf3.strip() numf3 = numf3.split(' ') numf3.remove(numf3[0]) return random.choice(numf3) def numm4(): numm4 = list() numm4 = lines[42] numm4 = numm4.strip() numm4 = numm4.split(' ') numm4.remove(numm4[0]) return random.choice(numm4) def numf4(): numf4 = list() numf4 = lines[43] numf4 = numf4.strip() numf4 = numf4.split(' ') numf4.remove(numf4[0]) return random.choice(numf4) def numm5(): numm5 = list() numm5 = lines[44] numm5 = numm5.strip() numm5 = numm5.split(' ') numm5.remove(numm5[0]) return random.choice(numm5) def numf5(): numf5 = list() numf5 = lines[45] numf5 = numf5.strip() numf5 = numf5.split(' ') numf5.remove(numf5[0]) return random.choice(numf5) def adv2(): adv2 = list() adv2 = lines[46] adv2 = adv2.strip() adv2 = adv2.split(' ') adv2.remove(adv2[0]) return random.choice(adv2) def adv3(): adv3 = list() adv3 = lines[47] adv3 = adv3.strip() adv3 = adv3.split(' ') adv3.remove(adv3[0]) return random.choice(adv3) def adv4(): adv4 = list() adv4 = lines[48] adv4 = adv4.strip() adv4 = adv4.split(' ') adv4.remove(adv4[0]) return random.choice(adv4) def adv5(): adv5 = list() adv5 = lines[49] adv5 = adv5.strip() adv5 = adv5.split(' ') adv5.remove(adv5[0]) return random.choice(adv5) def adv6(): adv6 = list() adv6 = lines[50] adv6 = adv6.strip() adv6 = adv6.split(' ') adv6.remove(adv6[0]) return random.choice(adv6) def random_line_5_1(): sentence5_1 = [adjm4() + ' ' + nm1(), adjm3() + ' ' + nm2(), adjm2() + ' ' + nm3(), adjm4() + ' ' + nm1(), adjf4() + ' ' + nf1(), adjf3() + ' ' + nf2(), adjf2() + ' ' + nf2(), adjf4() + ' ' + nf1(), partm3() + ' ' + nm2(), partm4() + ' ' + nm1(), partf4() + ' ' + nf1(), nm5(), nf5(), numm2() + ' ' + adjm1() + ' ' + nm2(), numm2() + ' ' + adjm2() + ' ' + nm1(), numm3() + ' ' + adjm1() + ' ' + nm1(), numm4() + ' ' + nm1(), numf2() + ' ' + adjf2() + ' ' + nf1(), numf2() + ' ' + nf3(), numf3() + ' ' + nf2()] return random.choice(sentence5_1) def random_line_7_1(): sentence7_1 = [adjm6() + ' ' + nm1(), adjm5() + ' ' + nm2(), adjm4() + ' ' + nm3(), adjm3() + ' ' + nm4(), adjm2() + ' ' + nm5(), adjm1() + ' ' + nm6(), adjf6() + ' ' + nf1(), adjf5() + ' ' + nf2(), adjf4() + ' ' + nf3(), adjf3() + ' ' + nf4(), adjf2() + ' ' + nf5(), partm6() + ' ' + nm1(), partm5() + ' ' + nm2(), partm4() + ' ' + nm3(), partm3() + ' ' + nm4(), partf6() + ' ' + nf1(), partf5() + ' ' + nf2(), partf4() + ' ' + nf3()] return random.choice(sentence7_1) def random_line_7_2(): sentence7_2 = [v6() + ' ' + conj1(), adv2() + ' ' + v4() + ' ' + conj1(), adv3() + ' ' + v3() + ' ' + conj1()] return random.choice(sentence7_2) def random_line_5_2(): sentence5_2 = [v5(), adv2() + ' ' + v3(), adv3() + ' ' + v2(), adv4() + ' ' + v1()] return random.choice(sentence5_2) def random_line_7_3(): sentence7_3 = [adv2() + ' ' + v5(), adv3() + ' ' + v4(), adv4() + ' ' + v3(), adv5() + ' ' + v2(), adv6() + ' ' + v1()] return random.choice(sentence7_3) def poem(): p = [random_line_5_1() + '.\n' + random_line_7_1() + '.\n' + random_line_5_1(), random_line_5_1() + '\n' + random_line_7_2() + '\n' + random_line_5_2(), random_line_5_1() + '\n' + random_line_7_3() + '.\n' + random_line_5_1(), random_line_5_1() + '\n' + random_line_7_3() + ',\n' + random_line_5_2()] return random.choice(p) print(poem()+'.') fname = input('Введите название файла: ') def openfile(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() text = text.lower() text = text.strip() words = [] words = text.split(' ') return words def count_words(fname): words = openfile(fname) n = 0 for word in words: word = word.strip('?!@ n += 1 return n def dicff(fname): words = openfile(fname) words.sort() fr = dict() for index in range(len(words)): if words[index] in fr: fr[words[index]] += 1 else: fr[words[index]] = 1 return fr print(count_words(fname), dicff(fname)) import re fname = input('Введите название файла: ') def openfile(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() text = text.lower() text = text.strip() words = [] words = text.split(' ') return words def words(fname): words = openfile(fname) a = [] for word in words: word = word.strip('?!@ a.append(word) return a regex = r'\bоткр(ыл[аи]?|о(ют?|е(шь|т|м|те))|ыть)\b' def formsearch(regex): wordlist = words(fname) match = [] for i in wordlist: i1 = str(i) m = re.search(regex, i1) if m != None: match.append(i) strmatch = '\n'.join(match) return strmatch print(formsearch(regex)) fname = input('Введите название файла: ') def openfile(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() text = text.lower() text = text.strip() words = [] words = text.split(' ') return words def ingform(fname): words = openfile(fname) a = [] for word in words: word = word.strip('?!@ if word.endswith('ing'): a.append(word) else: continue return a theword = input('Введите слово: ') def searching(theword): s = ingform(fname) b = 0 for i in s: if i == theword: b += 1 else: continue return b print(ingform(fname)) print(searching(theword)) import re fname = input('Введите название файла: ') def open_html(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() return text def find_capital(fname): text = open_html(fname) card = re.search(r'', text) if card != None: capital = re.search(r'data-wikidata-property-id="P36"(.*?)(.*?)', text) if capital != None: return capital.group(3) def find_country(fname): text = open_html(fname) card = re.search(r'

', text) if card != None: country = re.search(r'>(.*?)', text) if country != None: return country.group(1) print('Страна: ', find_country(fname), 'Столица: ', find_capital(fname)) import re fname = input('Введите название файла: ') def open_html(fname): with open(fname, 'r', encoding='utf-8') as f: text = f.read() te = re.sub(u'<.*?(".*?")?.*?>', u'', text, flags = re.U) te2 = re.sub(u'', u'', te, flags = re.U) te3 = re.sub(u'', u'', te2, flags = re.U) te4 = re.sub(u'.*?', u'', te3, flags = re.U) return te4 def changeform(fname): te = open_html(fname) change1 = re.sub(u'комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'слон\\1', te, flags = re.U) change2 = re.sub(u'Комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'Слон\\1', change1, flags = re.U) with open('results.txt', 'w', encoding='utf-8') as n: n.write(change2) return 'Готово! Результаты в файле results.txt .' print(changeform(fname)) import os import re from math import log punct = '[.,!«»?&@"$\[\]:;% tabs = '[\t\n]' def preprocessing(text): text_wo_punct = re.sub(punct, '', text.lower()) text_wo_punct = re.sub(tabs, ' ', text_wo_punct) words = text_wo_punct.strip().split() return words def count_tf(word, text): n = text.count(word) return n / len(text) def count_df(word, texts): i = [1 for text in texts if word in text] i = sum(i) return i def count_idf(word, texts): df = count_df(word, texts) try: idf = len(texts) / df except ZeroDivisionError: return 0 return idf def count_tfidf(word, text, texts): tf = count_tf(word, text) idf = count_idf(word, texts) tfidf = log(tf, 10)*log(idf, 10) return tfidf def keywords(text, texts): keywords = {} dic_tfidf = {} for word in text: if word in dic_tfidf: continue tfidf = count_tfidf(word, text, texts) dic_tfidf[word] = tfidf i = 0 for el in sorted(dic_tfidf, key= lambda x: dic_tfidf[x]): if i > 5: break i += 1 keywords[el] = dic_tfidf[el] return keywords def main(): texts = {} for root, dirs, files in os.walk('wikipedia'): for f in files: with open(os.path.join(root,f), 'r', encoding='utf-8') as t: content = t.read() text = preprocessing(content) texts[f] = text raw_texts = list(texts.values()) for t in texts: print('\nИзвлекаем ключевые слова для текста {}'.format(t)) kwords = keywords(texts[t], raw_texts) for key in kwords: print(key, kwords[key]) if __name__ == '__main__': main()