'))
def find_author(text):
text_as_string = open_file_as_string(text)
author = re.search('')
author = auth.lstrip('')
return author
def find_created(text):
text_as_string = open_file_as_string(text)
created = re.search('')
created = auth.lstrip('')
return created
def main():
filetree = os.walk('news')
task1 = open('task1.txt', 'w', encoding = 'cp1251')
for root, dirs, files in filetree:
for f in files:
task1.write(f + '\t' + count_words(f) + '\n')
task1.close()
task2 = open('task2.csv', 'w', encoding = 'cp1251')
writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for root, dirs, files in filetree:
for f in files:
f.writerow([f] + [find_author(f)] + [find_created(f)])
if __name__ == '__main__':
main()
import re
import os
import csv
def open_file(xml):
with open(xml, 'r', encoding = 'cp1251') as f:
text = f.readlines()
return text
def open_file_as_string(xml):
with open(xml, 'r', encoding = 'cp1251') as f:
text = f.read()
return text
def count_words(text):
text_as_string = open_file_as_string(text)
return str(text_as_string.count(''))
def find_author(text):
text_as_string = open_file_as_string(text)
author = re.search('')
author = auth.lstrip('')
return author
def find_created(text):
text_as_string = open_file_as_string(text)
created = re.search('')
created = auth.lstrip('')
return created
def main():
filetree = os.walk('news')
task1 = open('task1.txt', 'w', encoding = 'cp1251')
for root, dirs, files in filetree:
for f in files:
task1.write(f + '\t' + count_words(f) + '\n')
task1.close()
task2 = open('task2.csv', 'w', encoding = 'cp1251')
writer = csv.writer(task2.csv, delimiter = '|', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for root, dirs, files in filetree:
for f in files:
f.writerow([f] + [find_author(f)] + [find_created(f)])
if __name__ == '__main__':
main()
import os
filetree = os.walk('news')
for root, dirs, files in filetree:
for f in files:
print(f)
def new_sentence(sentence):
sentence1 = ''
for word in sentence:
sentence1 += word.strip('.,;:?!') + ' '
sentence1 += '.'
return sentence1
def text_process(text_name):
f = open(text_name, 'r', encoding='utf-8')
text = f.read()
text = text.replace('!','.')
text = text.replace('?', '.')
text = text.replace('...','.')
l = text.split(.)
l1 = [new_sentence(sentence) for sentence in text]
f.close()
return l1
def create_dict(text):
dictionary = {sentence: {word: len(word) for word in sentence} for sentence in text}
def main():
text = text_process('text.txt')
return(create_dict(text))
main()
import random
n = open('nouns.txt', 'r')
nouns = [line.strip() for line in n]
v = open('verbs.txt', 'r')
verbs = [line.strip() for line in v]
c = open('clitics.txt', 'r')
clitics = [line.strip() for line in c]
n2 = open('nouns2.txt', 'r')
nouns2 = [line.strip() for line in n2]
p = open('marks.txt', 'r')
punctuation = [line.strip() for line in p]
i = open('imperatives.txt', 'r')
imperative = [line.strip() for line in i]
def verse1:
return (random.choice(nouns)+ ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation))
def verse2:
return(random.choice(imperative) + ' ' + random.choice(nouns) + ' ' + random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(punctiation)) seq))
def verse3:
return (random.choice(clitics) + ' ' + random.choice(nouns2) + ' ' + random.choice(verbs) + ' ' + random.choice(nouns) + ' ' + random.choice(punctuation))
def make_verse:
verse = random.choice([1,2,3])
if verse == 1:
return verse1()
elif verse == 2:
return verse2()
else:
return verse3()
for n in range(4):
print(make_verse)import os
def symbols(s):
ans = True
for i in s:
if i not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz':
ans = False
return ans
def main():
n = 0
for f in os.listdir('.'):
if symbols(f) = True:
n += 1
print (f)
print (n)
main()
s = input()
l = []
while s != '':
if len(s) > 5:
l.append(s)
s = input()
for i in(l):
print(i)s = input()
for i in range(0, len(s) + 1):
print(s[0:i])def text_process(text_name):
f = open(text_name, 'r', encoding='utf-8')
text = f.read()
l = text.split()
l1 = []
for word in l:
l1.append(word.strip('.,;:?![]{}'))
f.close()
return l1
def count_ness(text):
list_ness = []
for word in text:
if word.endswith(ness):
list_ness.append(word)
return list_ness
def frequency(word, text):
n = 0
for i in text:
if i == word:
n += 1
return n
def main():
text = text_process('text.txt')
words = {}
for word in count_ness(text):
words[word] = frequency(word, text)
frequencies = word.values()
print(len(count_ness(text)))
print(max(frequencies))
main()import re
def count_line():
with open('Test.xml', 'r', encoding = 'utf-8') as f:
s = 1
for line in f:
if line != ' \n':
s += 1
else:
break
return s
def write_in():
with open('Test.txt', 'w', encoding = 'utf-8') as f:
num = count_line()
f.write(str(num))
return
write_in()
def open_text():
with open('Test.xml', 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def phrase():
text = open_text()
d = {}
reg = re.findall(r'.*?',text)
for i in range(len(reg)):
if reg[i] not in d:
d[reg[i]] = 1
else:
d[reg[i]] +=1
return d
def write_phrase():
with open('Test1.txt', 'w', encoding = 'utf-8') as f:
d = phrase()
for key in d:
f.write(key + ',' + str(d[key])+ '\n')
return
write_phrase()
def n():
text = open_text()
reg = re.findall(r'(.*?)',text)
return reg
print(n())
word = input('Введите слово')
for i in range(len(word)):
print(word[i::] + word [:i])
print('Введите число')
a=float(input())
print('Введите число')
b=float (input())
print('Введите число')
c=float(input())
if a%b==c:
print ('a даёт остаток c при делении на b')
else:
print('a не даёт остаток c при делении на b')
if a*c+b==0:
print ('c является решением линейного уравнения ax + b = 0')
else:
print('c не является решением линейного уравнения ax + b = 0')
import os
def files():
dic={}
for root, dirs, files in os.walk('.'):
for f in files:
f = f[f.rfind('.')+1:]
if f not in dic:
dic[f] = 1
else:
dic[f]+=1
for key in dic:
if dic[key] == max(dic.values()):
return key
print(files())
import random
def noun_f():
file = open ('Существительные_ж.txt' , 'r', encoding = 'utf-8')
for line in file:
noun = line.split()
file.close()
return random.choice(noun)
def noun_m():
file = open ('Существительные_м.txt' , 'r', encoding = 'utf-8')
for line in file:
nouns = line.split()
file.close()
return random.choice(nouns)
def noun_number_of():
file = open ('Существительные_множественные.txt' , 'r', encoding = 'utf-8')
for line in file:
nouns = line.split()
file.close()
return random.choice(nouns)
def adjective_m(word):
file = open ('Прилагательные_м.txt' , 'r', encoding = 'utf-8')
for line in file:
adjectives = line.split()
file.close()
return random.choice(adjectives) + ' ' + word
def adverb():
file = open ('Наречия.txt' , 'r', encoding = 'utf-8')
for line in file:
adverbs = line.split()
file.close()
return random.choice(adverbs)
def verb_f(subj):
file = open ('Глаголы_ж.txt' , 'r', encoding = 'utf-8')
for line in file:
verbs = line.split()
file.close()
return random.choice(verbs) + ' ' + subj
def verb_m(adv,n):
file = open ('Глаголы_м.txt' , 'r', encoding = 'utf-8')
for line in file:
verbs = line.split()
file.close()
return adv + ' ' + n+ random.choice(verbs)+ ' '
def verb_inf():
file = open ('Глаголы_инф.txt' , 'r', encoding = 'utf-8')
for line in file:
verbs = line.split()
file.close()
return random.choice(verbs)
def verb_transitive(obj):
file = open ('Глаголы_переход.txt' , 'r', encoding = 'utf-8')
for line in file:
verbs = line.split()
file.close()
return ', который ' + random.choice(verbs) + ' ' + obj
def verb_imp():
file = open ('Глаголы_пов.txt' , 'r', encoding = 'utf-8')
for line in file:
verbs = line.split()
file.close()
return random.choice(verbs)
def time():
file = open ('Время.txt' , 'r', encoding = 'utf-8')
for line in file:
time = line.split()
file.close()
return random.choice(time)
def pronoun():
file = open ('Местоимения.txt' , 'r', encoding = 'utf-8')
for line in file:
pronouns = line.split()
file.close()
return random.choice(pronouns)
def no():
no = [ 'не ', '']
return random.choice(no)
def random_sentence1():
sentence = 'Иди и ' + verb_imp() + ' мне ' + noun_m()+'а' + '!'
return sentence
def random_sentence2():
sentence = adjective_m(noun_m()) + verb_transitive(noun_number_of())+ ',' +\
verb_m(adverb(), no()) + verb_inf() + '.'
return sentence
def random_sentence3():
sentence = 'Где ' + time() + ' ' + verb_f(noun_f()) + '?'
return sentence
def random_sentence4():
sentence = 'Если б ' + pronoun() + ' был ' + noun_m()+ ', то ' +\
verb_m(adverb(), no())+ ' бы ' + verb_inf() + '.'
return sentence
def random_text():
sentences = [random_sentence1(), random_sentence2(), random_sentence3(), random_sentence4()]
return random.choice(sentences)
print("---- FASCINATING MASTERPIECE STARTS HERE ----")
num_of_sents = 5
for i in range(num_of_sents):
sentence = random_text()
sentence = sentence.capitalize()
print(sentence, end=' ')
print("\n---------AND ENDS HERE ---------")
print ('Введите слово')
word = input()
for letter in word[::-1]:
if letter not in 'з,я':
print (letter)
if letter in 'з,я':
continue
print (letter)
import re
import os
def text_read():
for root, dirs, files in os.walk('.'):
for f in files:
if f.endswith('.xml'):
with open( f, 'r', encoding = 'utf - 8') as text:
text = text.read()
return text
def count():
text = text_read()
reg1 = re.findall(r'.*', text)
num = len(reg1)/len(reg2)
return num
print(count())
def part_of_speech():
text = text_read()
dic = {}
reg = re.findall(r'gr="([A-Z]*)', text)
for i in reg:
if i not in dic:
dic[i] = 1
else:
dic[i]+=1
return dic
print(part_of_speech())
def write_in():
with open('Test1.txt', 'w', encoding = 'utf-8') as f:
d = part_of_speech()
template = '{}{:>10}'
for key in sorted(d):
f.write((template.format(key, d[key]))+ '\n')
return
def write():
with open('Test1.txt', 'w', encoding = 'utf-8') as f:
d = part_of_speech()
for key in sorted(d):
f.write(key+'\t'+str(d[key])+ '\n')
return
write()
import re
def open_text():
with open('Programming.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip(',.?!-')
return arr
def prog():
arr = open_text()
regex = r'\bпрограммир(ова(ть(ся)?|нн(ым|о(е|го|му?))|вш(ая|ую|и(е|й|ми?|х)|е(й|е|му?|го))(ся)?|в|л([иа]?(сь)?)|(ся)?)|у((я(сь)?|ем(о(е|го|й|му?)|ы(е|й|х|ми?)|ая|ую)|ю(щ(ая|ую|и(е|й|х|ми?)|е(го|й|му?))(ся)?))|ют(ся)?|е((шь|т|ем)(ся)?)|ю(сь)?|ете(сь)?))\b'
arr1 = []
for i in range(len(arr)):
m = re.search(regex,arr[i])
if m != None:
if arr[i] in arr1:
pass
else:
arr1.append(arr[i])
return ', '.join(map(str,arr1))
print(prog())
import re
def open_s():
with open ('Высшая школа экономики — Википедия.html','r', encoding = 'utf - 8')as f:
content = f.read()
links = r'Преподаватели | \n\n (.*?)(.*?) '
m = re.search(links,content)
if m != None:
return m.group(3)
def result():
with open ('Результат.txt', 'w', encoding = 'utf - 8')as file:
result = open_s()
return file.write('Преподаватели:'+ result)
open_s()
result()
import re
def open_text():
with open('Leskov.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip(',.”"?!-:;')
return arr
def words():
arr = open_text()
return len(arr)
print(words())
def frequency():
arr = open_text()
d = {}
for i in range(len(arr)):
if arr[i] not in d:
d[arr[i]] = 1
else:
d[arr[i]] +=1
return d
def result():
with open ('Результат.csv', 'w', encoding = 'utf - 8')as file:
d = frequency()
for key in sorted(d):
file.write(key + ',' + str(d[key])+ '\n')
return
result()
def phrase():
with open('Leskov.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
with open ('Результат.txt', 'w', encoding = 'utf - 8')as file:
reg = re.findall(r'\b\w*аго \w*(?:и|а|ы)',text)
for reg
return reg
phrase()
def puzzle():
phrase = {'незванный':'гость','розовый':'слон','вишнёвый':'сад', 'сиреневый':'туман', 'кленовый':'лист'}
for key in phrase:
for i in range(len(key)):
print(key + '...')
w = input('Я загадал слово ')
if w == phrase[key]:
return print('Ты выиграл')
else:
print ('Ты проиграл')
return
puzzle()
def open_text():
with open('Книга1.csv', 'r', encoding = 'utf - 8') as f:
line= f.readline()
arr = line.split(';')
for i, w in enumerate(arr):
arr[i] = arr[i].strip('\n')
line = f.readline()
arr1 = line.split(';')
phrase = dict(zip(arr,arr1))
return phrase
def puzzle():
phrase = open_text()
for key in phrase:
for i in range(len(key)):
print(key + '...')
w = input('Я загадал слово ')
if w == phrase[key]:
return print('Ты выиграл')
else:
print ('Ты проиграл')
return
puzzle()
import re
def open_s():
with open ('Лингвистика — Википедия.html','r', encoding = 'utf - 8')as f:
text = f.read()
return text
def lang_meat():
m = open_s()
step = re.sub(r'\bязык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bшашлык\1',m, flags = re.DOTALL)
step2 = re.sub(r'\bЯзык(а(ми?|х)?|у|о(м|в)|е|и)?\b',r'\bШашлык\1', step, flags = re.DOTALL)
return step2
def result():
with open ('Результат.txt', 'w', encoding = 'utf - 8')as file:
result = lang_meat()
return file.write(result)
result()
first = 0
second = 0
f = open( "Капибара.txt", "r", encoding = "utf-8")
for line in f:
arr = line.split()
for i in arr:
if len(i) == 3 and i[len(i)-1] != ',' and i[len(i)-1] != '.' and i[len(i)-1] != ':' and i[len(i)-1] != ';' and i[len(i)-1] != '!' and i[len(i)-1] != '?':
first += 1
if len(i) == 4 and i[len(i)-1] == ',' for i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?':
first += 1
if len(i) == 1 and i != '―':
second += 1
if len(i) == 2 and i[len(i)-1] == ',' or i[len(i)-1] == '.' or i[len(i)-1] == ':' or i[len(i)-1] == ';' or i[len(i)-1] == '!' or i[len(i)-1] == '?':
second +=1
if second == 0:
print( 'Слов длины один нет')
else:
num = first/second
print(num)
f.close()
import re
import os
def folder():
arr = [f for f in os.listdir('.')if re.search(r'[а-яёЁА-Я]+',f)if os.path.isdir(f)]
print(len(arr))
return arr
folder()
def print_result():
result = []
for f in os.listdir('.'):
if os.path.isfile(f):
f = f[:f.rfind('.')]
if f not in result:
result.append(f)
else:
if f not in result:
result.append(f)
return ' '.join([str(i) for i in result])
print(print_result())
import os
import re
def text():
for root, dirs, files in os.walk('.'):
for f in files:
if f.endswith('.xhtml'):
with open( f, 'r') as text:
text = text.read()
reg = re.findall(r'', text)
with open('Exam.txt', 'w', encoding = 'utf-8') as f2:
f2.write(f +'\t'+str(len(reg))+ '\n')
return
text()
def table():
for f in os.listdir('.'):
with open( f, 'r') as text:
text = text.read()
reg1 = re.findall(r' ([А-Яа-яёЁ]*)\.', text)
reg2 = re.findall(r'([0-9]*)', text)
for i in reg1 and j in reg2:
with open ('Результат.csv', 'w', encoding = 'utf - 8')as file:
file.write( f + ',' + i + ',' + j+ ','+ '\n')
return
table()
n = 0
f = open( "Цитаты.txt", "r", encoding = "utf-8")
for line in f:
arr = line.split ('—')
arr2 = arr[0].split()
if len(arr2) < 10:
print (arr[0])
arr3 = line.split()
for i in arr3:
if i == 'разум':
n += 1
print (n)
f.close()
import re
def open_s():
with open ('Капибара — Википедия.html','r', encoding = 'utf - 8')as f:
text = f.read()
reg = r'(.*?)'
m = re.findall(reg,text)
return m
print (open_s())
for link in links [:10]:
print(link[0]
for link in links[:10]:
print(link[2],'-->', link[1]
import os
def delete(dirname):
for root, dirs, files in os.walk(dirname):
for f in files:
os.remove(os.path.join(root, f))
for d in dirs:
delete(os.path.join(root,d))
os.rmdir(root)
delete('кот')
def print_tree(dirname, space = 0):
for root, dirs, files in os.walk(dirname):
print(''*root)
for i in files:
print(''*space,' **()'.format(i)
space += 2
import os
def task_0():
print(os.listdir('.'))
task_0()
def task_1():
sent = input('Введите предложение:')
arr = sent.split()
path = '\\'.join([str(i) for i in arr])
os.makedirs(path)
task_1()
def task_2():
n = int(input())
arr = []
for i in range(n):
arr.append(i)
path = '\\'.join([str(i) for i in arr])
os.makedirs(path)
task_2()
def count_tf(word, text):
return text.count(word) / len(text)
def count_df(word, texts):
n = [1 for text in texts if word in text]
return sum(n)
def count_idf(word, texts):
n = len(texts) / (1 + count_df(word, texts))
return n
from math import log
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
return log(tf, 10) * log(idf, 10)
import re
punct = '[.,!«»?&@"$\[\]\(\):;%
def preprocessing(text):
text_wo_punct = re.sub(punct, '', text.lower())
word = text_wo_punct.strip().split()
words = [i for i in word if len(i)>4 and re.search(r'[1-9]+', i) is None]
return words
import os
texts_dic = {}
for root, dirs, files in os.walk('wikipedia'):
for f in files[:50]:
with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
text = preprocessing(t.read())
texts_dic[f.split('.')[0]] = text
texts = list(texts_dic.values())
for text in texts_dic:
print("Top words in document {}".format(text))
scores = {}
for word in texts_dic[text]:
scores[word] = count_tfidf(word, texts_dic[text], texts)
sorted_words = sorted(scores.items(), key=lambda x: x[1])
for word, score in sorted_words[:5]:
print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
def open_text():
with open('text.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip(',.?!-')
return arr
def first_letter(letter):
arr = open_text()
for i in arr:
if letter == i[0]:
print (i)
else:
pass
return i
letter = input('Введите букву')
def c():
cont = input('Введите страну ')
d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'}
if cont in d:
return d[cont]
else:
return 'NO'
def change():
d = {'Россия': 'Москва','Германия' : 'Берлин','Италия':'Рим', 'Франция': 'Париж', 'Азербайджан': 'Баку'}
d1 = {}
for key in d:
city = d[key]
d1[city] = key
return d1
def delete_doubles():
d = { 'Петя': 12345, 'Пётр': 12345, 'Аня': 54321, 'Анна': 54321, 'Сёма': 13579}
arr = []
d1 = {}
for key in d:
if d[key] in arr:
pass
else:
append
d1[key] = d[key]
return d1
print(delete_doubles())
import re
def open_text():
with open('Жирафики.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip(',.?!-')
return arr
def giraf():
s = input('Введите что-нибудь ')
regex = 'жираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?'
m = re.search(regex,s)
if m != None:
return 'Я нашёл'
print(giraf())
def giraf_in_text():
arr = open_text()
regex = r'\bжираф(а(ми?|х)?|у|е|о[мв]|ами|ы)?\b'
m = re.search(regex,i[arr])
s = 0
for i[arr] in arr:
if m != None:
s += 1
return s
print(giraf_in_text())
import re
def open_s():
with open ('Динозавры — Википедия.html','r', encoding = 'utf - 8')as f:
text = f.read()
return text
def find_dino():
text = open_s()
reg = r'\b[Дд]инозавр[а-я]{0,5}'
m = re.findall(reg, text)
return m
print (find_dino())
def no_html():
text = open_s()
m = re.sub(u'<.*?>', u'', text, flags = re.DOTALL)
return m
print (no_html())
def cat_dino():
m = no_html()
n = re.sub(r'\bдинозавр',r'\bкот',m,flags = re.DOTALL)
n1 = re.sub(r'\bДинозавр',r'\bКот',n, flags = re.DOTALL)
return n1
print(cat_dino())
import re
def open_text():
with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = re.sub(r'\n',' ', text)
arr = re.split('\.|\?|\! ', text)
for i, s in enumerate(arr):
arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i])
return arr
def words_5():
arr = open_text()
for i in range(len(arr)):
arr1 = arr[i].split()
template = '{}_{}'
length =[template.format(arr1[i],len(arr1[i])) for i,w in enumerate(arr1)]
for i in range(len(length)):
print(length[i])
return
words_5()
import re
def open_text():
with open('Гоголь.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = re.sub(r'\n',' ', text)
arr = re.split('\.|\?|\! ', text)
for i, s in enumerate(arr):
arr[i] = re.sub(r'[:;,.?!— -]',' ', arr[i])
return arr
def words_5():
arr = open_text()
for i in range(len(arr)):
arr1 = arr[i].split()
template = '{}_{}'
length ={print(template.format(arr1[i],len(arr1[i]))) for i,w in enumerate(arr1)}
return
words_5()
def open_text():
with open('Austen_Jane.txt', 'r', encoding = 'utf - 8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip(',.”"?!-:;')
return arr
def words():
arr = open_text()
arr1 = []
for i,w in enumerate(arr):
if arr[i][-4:] == 'hood':
arr1.append(arr[i])
return arr1
def number_of_words():
arr1 = words()
return len(arr1)
def the_minimum_frequency():
arr = open_text()
arr1 = words()
y = 1
n = 0
for i,w in enumerate(arr1):
y = min(y, arr1.count(arr1[i]))
for i,w in enumerate(arr1):
if y == arr1.count(arr1[i]):
n = i
return arr1[n]
def base():
arr1 = words()
arr2 = []
for i,w in enumerate(arr1):
x = arr1[i].rfind('h')
arr2.append(arr1[i][:x])
return arr2
print(number_of_words())
print(the_minimum_frequency())
print(' '.join(map(str,(base()))))
arr = []
word = input('Введите слово')
while word:
arr.append (word)
word = input( 'Введите слово')
for w in range(len(arr)):
print(arr[w][w+1::])
word=input("Введите слово: ")
n=[]
for i in range(len(word)):
n=word[len(word)-i:]
n+=word[i:]
print(n)
print ("Введите число")
num=int(input())
print ("Введите слово")
w=input()
while w!= "программирование" and w!= "программирование":
for i in range(num):
print (w)
print ("Введите число")
num=int(input())
print ("Введите слово")
w=input()
print ("Конец")
w = []
while True:
word =(input('Введите латинское слово: '))
if len (word) ==0: break
elif word[-2:]== 're' or word [-2:]=='ri':
w.append(word)
for i in range (len(w)):
print (w[i])
def open_text(name):
with open (name+'.txt', 'r', encoding ='utf-8') as f:
text=f.read()
ntext=text.lower()
words=ntext.split(' ')
for i,word in enumerate (words):
words[i]=word.strip('.,!?-')
return words
def edwords(a):
ed=[]
edlist=int()
for i,word in enumerate (a):
if word.endswith('ed'):
ed.append(word)
edlist+=1
print ('Количество форм на -ed равно',str(edlist))
return (ed)
def iedwords(b):
iedlist=int()
for i,word in enumerate (b):
if word.endswith('ied'):
iedlist+=1
print ('Количество форм, образованных от глаголов на -у или -е равно',str(iedlist))
return ()
def end():
name=input('Введите название файла: ')
a=open_text(name)
b=edwords(a)
c=iedwords(b)
return (c)
u=end()
with open("text.txt", "r", encoding="utf-8") as f:
text=f.read()
words=text.split(' ')
words_num=len(words)
letters=list(text)
marks_num=int()
for i in letters:
if i=="." or i==",":
marks_num+=1
percent=marks_num/words_num*100
print('Процент слов, имеющих знак препинания: ', round(percent))
import random
def bigram():
b={}
with open ('text.csv', 'r') as f:
lines=f.readlines()
for line in lines:
line=line.split(',')
b[line[0]]=line[1]
return(b)
def dots(w):
res=''
for i in range(len(w)):
res+='. '
return res
def rand(b):
k=list(b.keys())
return random.choice(k)
print ('Сейчас мы сыграем в игру "Угадай слово"!')
big=bigram()
word=rand(big)
print ("Подсказка:")
print (big[word]+' '+ dots(big[word]))
answer=input('Как вы думаете, что это за слово? ')
if answer==word:
print ("Правильно!")
else:
print ('Увы, неправильно!')
import os
import re
def sents():
news = 'news'
sent = {}
for n in os.listdir(news):
with open(os.path.join(news, n), encoding='cp1251') as text:
text = text.read()
sent[n] = len(re.findall('', text))
return (sent)
def new (sent):
with open('new_file', 'w', encoding = 'utf-8') as new:
for s in sent:
new.write(s+'\t'+str(sent[s])+'\n')
new(sents())
import re
def oh():
lines=int()
with open ('text.txt', 'r', encoding='utf-8') as first:
old_text=first.readlines()
for line in old_text:
if '' in line:
break
else:
lines+=1
with open ('endtext.txt', 'w', encoding='utf-8') as second:
lines1=str(lines)
second.write(lines1)
def oops():
slov={}
with open ('text.txt', 'r', encoding='utf-8') as first:
old_text=first.readlines()
for line in old_text:
if " 2) and (usl[2] == 'ед') and (usl[3] == 'жен'):
mass.append(arr[0])
sum += float(arr[2])
print(', '.join(mass))
print('Сумма ipm = ', sum)
n=input('Введите любое число. ')
n=int(n)
while n != 0 :
sl=input('Введите любое слово. ')
if sl == 'программирование':
break
print(sl)
n=n-1
word = input("Введите слово: ")
for k in range(len(word)):
newword = (word[-k: ] + word[ :-k])
print(newword)
import re
import os
def sent_count():
path = './news/'
for root, dirs, files in os.walk(path):
for f in files:
with open(os.path.join(root, f), 'r', encoding = 'cp1251') as t:
text = t.read()
mass = []
mass = text.split('\n')
s_count = 0
for i in mass:
if re.search('', i):
s_count += 1
with open('result.txt', 'a', encoding = 'utf-8') as file:
file.write(f +'\t' + str(s_count) + '\n')
def write_csv():
with open('result.csv', 'w', encoding = 'utf-8') as file:
output = csv.writer(file, delimiter = ',')
head = ['Название файла', 'Автор', 'Тематика текста']
path = './news/'
for root, dirs, files in os.walk(path):
for f in files:
with open(os.path.join(root, f), 'r', encoding = 'utf-8') as t:
text = t.read()
if re.search('', text):
auth = re.search('', text).group(1)
def main():
sent_count()
main()
s = 8
p = input ("введите число")
p = int (p)
while p!=s:
if p < s:
print ("больше")
else:
print ("меньше")
p = input ("ещё раз")
if len (p) == 0:
print ("всё")
break
p = int (p)
if p==s:
print ("вы выиграли")
print (range (10))
import os
direct = 'D:\Downloads\news.zip\news'
files = os.listdir(direct)
for file in files:
f = open(file, 'r')
sent = 0
for line in f:
if '. ' in line:
sent += 1
f.close()
f = open('sent.txt', 'a')
f.write(file, ' ', sent, '\n')
f.close()
A = [0] * 7
for i in range(7):
A[i] = int(input("введите число"))
for i in range(7):
B = ["X"] * A[i]
if A[i] < 0:
print ("введено отрицательное число")
else:
print (''.join([str(i) for i in B]))
s = input ("введите число")
s = int (s)
for i in range (10):
i += 1
p = i*s
print (i, "*", s, "=", p)
words = 0
cap_words = 0
for line in open('text.txt','r', encoding='utf-8'):
word = line.split(' ')
for word in open('text.txt','r', encoding='utf-8'):
words +=1
if word.istitle():
cap_words += 1
s = (cap_words/words)*100
print ("слов, начинающихся с заглавной буквы", s, "%")
a = input ("введите a")
b = input ("введите b")
c = input ("введите c")
a = int (a)
b = int (b)
c = int (c)
if c == a % b:
print ("a даёт остаток c при делении на b")
else:
print ("a НЕ даёт остаток c при делении на b")
if c == a/b:
print ("a разделить на b равно c")
else:
print ("a разделить на b НЕ равно c")
def linecount (text):
lines = 0
for line in text:
lines += 1
return lines
def freq (text):
A = dict()
for line in text:
if " 3):
dic[element] += 1
elif len(element)> 3:
dic[element] = 1
return dic
def delete (dic):
dic1=dic.copy()
for word in dic1:
if dic1[word] == 1:
del dic[word]
return dic
corpus_freq = freq_dict(words)
anek_freq = freq_dict(words_anek)
izvest_freq = freq_dict(words_izvest)
teh_freq = freq_dict(words_teh)
delete (corpus_freq)
delete(anek_freq)
delete(izvest_freq)
delete(teh_freq)
def pmi_for_cats(x, y):
if y == 'anek':
dic = anek_freq
arr = words_teh + words_izvest
num = num_anek
elif y == 'teh':
dic = teh_freq
arr = words_anek + words_izvest
num = num_teh
elif y == 'izvest':
dic = izvest_freq
arr = words_teh + words_anek
num = num_izvest
p_xy = dic[x]/len(arr)
p_x, p_y = corpus_freq[x]/len(words), num/(num_izvest + num_teh + num_anek)
pmi = log(p_xy/(p_x * p_y))
return pmi
cat_pmi = {}
i = 0
for word in corpus_freq:
if i > 100:
break
try:
pmi_anek = pmi_for_cats(word, 'anek')
except KeyError:
pmi_anek = 0
try:
pmi_teh = pmi_for_cats(word, 'teh')
except KeyError:
pmi_teh = 0
try:
pmi_izvest = pmi_for_cats(word, 'izvest')
except KeyError:
pmi_izvest = 0
max_pmi = max(pmi_anek, pmi_teh, pmi_izvest)
if max_pmi == 0:
continue
if max_pmi == pmi_anek:
cat = 'anek'
elif max_pmi == pmi_teh:
cat = 'teh'
elif max_pmi == pmi_izvest:
cat = 'izvest'
print(word, cat)
i += 1
word = input ("Введите слово: ")
a = 0
b = len (word)
while word [a:b] != "":
print (word [a:b])
a += 1
b -=1
A = []
i = 0
print ('Enter 8 words')
while i < 9:
list.append(A, input())
i+=1
i = 1
while i < 9:
print (A[i-1]+A[i])
i+=2
a = int (input ("Введите число a: "))
b = int (input ("Введите число b: "))
c = int (input ("Введите число c: "))
if (a/b == c):
print ("a / b = c")
else: print ("a / b != c")
if (a ** b == c):
print ("a ^ b = c")
else: print ("a ^ b != c")
def questions():
file = input('Введите название файла на английском: ')
leng = int(input('Введите длину слова: '))
quant = open_file(file)
output = perc(quant, leng)
return output
def open_file(file):
f = open(file, 'r')
file = f.read()
file = file.split()
return file
def perc(quant, leng):
i = 0
j = 0
for item in quant:
if item.startswith('un'):
i += 1
if len(item) > leng:
j += 1
if i != 0:
print ('Количество слов, начинающихся с un-, в тексте: ', i)
return round(j / i * 100)
else:
return 'В тексте нет слов, начинающихся на un-'
print('Проценты: ', questions())
import random
def read ():
f = open('text.txt', 'r')
l = f.readlines()
return l
def array (numb):
a = read()[numb].split()
return a
def noun2 ():
return random.choice(array(0))
def noun3 ():
return random.choice(array(1))
def noun4 ():
return random.choice(array(2))
def imper2 ():
return random.choice (array(3))
def imper3 ():
return random.choice(array(4))
def imper4 ():
return random.choice(array(5))
def verb2 ():
return random.choice(array(6))
def verb3 ():
return random.choice(array(7))
def verb4 ():
return random.choice(array(8))
def adverb1 ():
return random.choice (array(9))
def adverb2 ():
return random.choice (array(10))
def adverb3 ():
return random.choice (array(11))
def adverb4 ():
return random.choice (array(12))
def punct():
marks = [".", "?", "!", "..."]
return random.choice(marks)
def verse_5_1 ():
return imper3() + ' ' + noun2() + punct()
def verse_5_2 ():
return imper2() + ' ' + noun3() + punct()
def verse_5_3 ():
return verb2() + ' ' + noun3() + punct()
def verse_5_4 ():
return verb3() + ' ' + noun2() + punct()
def verse_5_5 ():
return adverb1() + ' ' + verb2() + ' ' + noun2() + punct()
def verse_5_6 ():
return adverb1() + ' ' + imper4() + punct()
def verse_5_7 ():
return adverb2() + ' ' + imper3() + punct()
def verse_5_8 ():
return adverb3() + ' ' + imper2() + punct()
def verse_7_1 ():
return imper3() + ' ' + noun4() + punct()
def verse_7_2 ():
return imper4() + ' ' + noun3() + punct()
def verse_7_3 ():
return verb3() + ' ' + noun4() + punct()
def verse_7_4 ():
return verb4() + ' ' + noun3() + punct()
def verse_7_5 ():
return adverb1() + ' ' + verb3() + ' ' + noun3() + punct()
def verse_7_6 ():
return adverb1() + ' ' + verb4() + ' ' + noun2() + punct()
def verse_7_7 ():
return adverb1() + ' ' + verb2() + ' ' + noun4() + punct()
def verse_7_8 ():
return adverb2() + ' ' + verb2() + ' ' + noun3() + punct()
def verse_7_9 ():
return adverb2() + ' ' + verb3() + ' ' + noun2() + punct()
def make_verse_5 ():
verse = random.choice([1,2,3, 4, 5, 6, 7, 8])
if verse == 1:
return verse_5_1()
elif verse == 2:
return verse_5_2()
elif verse == 3:
return verse_5_3()
elif verse == 4:
return verse_5_4()
elif verse == 5:
return verse_5_5()
elif verse == 6:
return verse_5_6()
elif verse == 7:
return verse_5_7()
else:
return verse_5_8()
def make_verse_7 ():
verse = random.choice([1,2,3, 4, 5, 6, 7, 8, 9])
if verse == 1:
return verse_7_1()
elif verse == 2:
return verse_7_2()
elif verse == 3:
return verse_7_3()
elif verse == 4:
return verse_7_4()
elif verse == 5:
return verse_7_5()
elif verse == 6:
return verse_7_6()
elif verse == 7:
return verse_7_7()
elif verse == 8:
return verse_7_8()
else:
return verse_7_9()
print(make_verse_5())
print(make_verse_7())
print(make_verse_5())
print(make_verse_7())
print(make_verse_7())
num = int (input ("Введите натуральное чиcло: "))
i = 1
while 2**i < num:
print (2**i)
i+=1
import re
def read_file():
with open ('corp.txt', 'r', encoding='UTF-8') as file:
text=file.read()
file.close()
return text
def counter():
file = open('corp.txt', 'r', encoding='UTF-8')
i=1
for line in file:
if '' not in line:
i+=1
else:
break
file.close()
return i
def five_points():
new_file=open('подсчет строк.txt', 'w', encoding='utf-8')
text=new_file.write('Число строк заголовка: ' + str(counter()))
new_file.close()
def dictionary():
d={}
wordlist=[]
lemmas=re.findall('>\w+', read_file())
for lemma in lemmas:
lemma=lemma.strip('>')
wordlist.append(lemma)
for word in wordlist:
if word in d:
d[word]+=1
else:
d[word]=1
return d
def eight_points():
d=dictionary()
dic_file=open('словарик.txt', 'w', encoding='utf-8')
for key in d:
text=dic_file.write(key+' - '+ str(d[key])+' \n')
dic_file.close()
def ten_points():
formlist=[]
file = open('corp.txt', 'r', encoding='UTF-8')
for line in file:
pronom=re.search('type="(f.h.+?)"', line)
if pronom != None:
find=pronom.group(1)
formlist.append(find)
return formlist
five_points()
eight_points()
print ('Загляните в папку с программой и попробуйте найти в ней новые txt-файл.')
print (ten_points())import os
def lists_creator():
aaa=[]
punct=[]
file_list=[files for root, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки')]
for folder in file_list:
for file in folder:
q_a=0
q_punct=0
for word in file:
for letter in word:
if letter=='a' or letter =='A' or letter =='А' or letter =='а':
q_a+=1
if letter =='.' or letter ==',' or letter =='?' or letter =='!' or letter =='(' or letter == ')' or letter =='-':
q_punct+=1
if q_a>3:
aaa.append(file)
if q_punct-1>0:
punct.append(file)
print ('+++++++++++++Файлы, в которых большк 3х "а":+++++++++++++')
for el in aaa:
print (el)
print ('+++++++++++++Файлы со знаками препинания в названии:+++++++++++++')
for el in punct:
print(el)
def kracuvo():
for roots, dirs, files in os.walk('/home/lera/Рабочий стол/Загрузки'):
for dir in dirs:
print('--',dir)
path='/home/lera/Рабочий стол/Загрузки'+'/'+str(dir)
for file in os.listdir(path):
print (' ', file)
print(os.path.join('дз ап', 'morozova3.docx'))import os
import re
def s_counter_5():
s_result = open('res.txt', 'w', encoding='utf-8')
for root, dirs, files in os.walk('news'):
s_result = open('res.txt', 'w', encoding='utf-8')
for file in files:
with open(os.path.join('news',file), 'r') as f:
file_text = f.read()
q=0
for line in file_text:
if line=='.' or line=='?' or line=='!':
q+=1
text=s_result.write(file +'\t'+str(q)+ '\n')
s_result.close()
return s_result
def table_8():
table = open('table.csv', 'w', encoding='utf-8')
for root, dirs, files in os.walk('news'):
for file in files:
with open(os.path.join('news',file), 'r') as f:
file_text = f.read()
info=re.findall('.+', file_text)
for el in info:
a=re.search('>([a-яА-Я]+.[a-яА-Я]+)?\.', el)
if a!=None:
author=a.group(1)
else:
author='no author'
text=table.write(author+'\n')
s_counter_5
table_8()
import re
def file_name():
print ('Поместите файл в одну папку с данной программой.\nВведите имя файла, чтобы получить список словоформ:')
name=input()
return name
def read_file():
wordlist=[]
file = open(file_name(), 'r', encoding='UTF-8')
for line in file:
linelist=line.split()
for word in linelist:
word=word.lower()
word=word.strip('.,:;"«»-?()!')
wordlist.append(word)
file.close()
return wordlist
def form_finder():
form_list=[]
for word in read_file():
form=re.search('(не(до)?|под)?вып[еиь]([йтлеюи]|(вш))[мшьаоиыуе]?(го|м(у|и)?[ейяюх])?(ся)?', word)
if form!=None:
find=form.group()
form_list.append(word)
return form_list
def list_without_repetitions():
list=form_finder()
for el in list:
el_new=el
for el in list:
if el_new==el:
list.remove(el)
return list
for el in list_without_repetitions():
print(el)print ('Введите число')
n=int(input())
for i in range(n):
print('Введите слово')
a=input()
print ('Ваше слово:', a)
if a=='программирование':
break
print ('Цикл завершен')f=open('wordlist.txt', 'r', encoding='utf-8')
for line in f:
arr = line.split()
for i,word in enumerate(arr):
arr[i] = word.strip('.,?!;:-"')
for el in arr:
el=el.lower()
print (el)
import random
def open_file():
file = open('wordlist.txt', 'r', encoding='UTF-8')
lines = file.readlines()
file.close()
return lines
def random_word(lines):
ugly_word = random.choice(lines)
word = ugly_word.strip('\n')
return word
def syllable_counter(word):
syl_quan=0
for letter in word:
if letter=='e' or letter=='y' or letter=='u' or letter=='i' or letter=='o' or letter=='a' or letter=='é' or letter=='è' or letter=='ê' or letter=='à' or letter=='â' or letter=='ù' or letter=='û' or letter=='ô' or letter=='î':
syl_quan+=1
return syl_quan
def line_creator(syl_number):
syl_max = syl_number
line = ''
while syl_max >= 0:
word = random_word(open_file())
syl_quan = syllable_counter(word)
syl_max -= syl_quan
if syl_max > 0:
line=line+' '+ word
continue
elif syl_max == 0:
line = line + ' ' + word
break
elif syl_max < 0:
line = ''
syl_max = syl_number
continue
punctuation=['!','.','?']
phrase=line[1].upper()+line[2:]+random.choice(punctuation)
print (phrase)
def main():
print('\nThere you can see one more perfect creation:\n')
line_creator(5)
line_creator(7)
line_creator(5)
if __name__ == '__main__':
main()
import os
import re
for item in files:
file_name=item.split('.')
if len(file_name[0])==5:
lat=re.search('[A-Za-z]{5}', file_name[0])
if lat!=None:
i+=1
if file_name[0] not in name_base:
name_base.append(file_name[0])
print ('Число файлов с названием из пяти латинских символов: ',i)
print ('\nСписок названий найденных файлов (без повторов):')
for el in name_base:
print (el)
import os
dirlist = [el for root, dirs, files in os.walk('.') for el in dirs]
stat = {}
letters = 'qwertyuiopasdfghjklzxcvbnmйцукенгшщзхъфывапролджэячсмитьбю'
letter = [name[0].lower() for name in dirlist]
for el in letter:
if el not in letters:
letter.remove(el)
if el in stat:
stat[el] += 1
else:
stat[el] = 1
i = 0
res = 0
for value in stat:
if stat[value] > i:
i = stat[value]
res = value
if i==0:
print ('Названий, начинающихся с букв, похоже, тут нет :(')
else:
print('Чаще всего названия папок начинаются с буквы:', res, '\nТакие названия встречаются', i, 'раз(a)')
word=input('Введите слово: ')
if word:
for i in range(len(word)):
print (word[i:]+word[:i])
if i>len(word)-1:
break
else:
print ('Нет входных данных')def read_words():
wordlist=[]
file = open('austen.txt', 'r', encoding='UTF-8')
for line in file:
linelist=line.split()
for word in linelist:
wordlist.append(word)
file.close()
return wordlist
def counter(part):
quan=0
for word in read_words():
if word[-len(part):]==part:
quan+=1
return quan
print ('Число форм в данном тексте, оканчивающихся на -ed: ',counter('ed'))
print ('Из них - правильные глаголы в прошедшем времени на -y:',counter('ied'))import re
def file_name():
print ('Поместите файл в одну папку с данной программой.\nВведите имя файла, чтобы получить список cфер деятельности данного ученого:')
name=input()
return name
def reader():
list=[]
file = open(file_name(), 'r', encoding='UTF-8')
for line in file:
line=line.strip('\n')
list.append(line)
file.close()
return list
def str_sphere():
infobox=reader()
sphere=''
q=0
for line in infobox:
if 'Научная сфера:' in line:
sphere=infobox[q+2]
break
else:
q+=1
return sphere
def main():
form=re.findall('>[а-я -]+', str_sphere())
list=''
for el in form:
el=el.strip('>.+',i)
form2=re.findall('ana',i)
for el in form2:
num_ana+=1
if form1!=None:
num_w+=1
koef=num_ana/num_w
return koef
def freq_dict_8():
d={}
list=[]
new_list=[]
for i in reader():
form=re.search('gr="(.+)"',i)
if form!=None:
list.append(form.group(1))
for el in list:
i = el.split(',')
new_list.append(i)
keys=[item[0].strip('=qwertyuiopasdfghjklzxcvbnm/<>" ') for item in new_list if item!='NUM=nom" /> 0 :
print('X'*nlist[i])
else:
print('')
i += 1
import re
def openfile_lines(fname):
with open(fname, 'r', encoding = 'utf-8') as f:
lines = f.readlines()
return lines
def find_words(lines):
words = []
for i in range(len(lines)):
if re.search('(.+?)<', words[i]):
found_lemma = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(1)
found_type = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(2)
found_form = re.search('lemma="(.+?)".*?type="(.+?)".*?>(.+?)<', words[i]).group(3)
pure.append([found_lemma, found_type, found_form])
return pure
def count_forms(words):
freq = {}
for i in range(len(words)):
form = re.search('type="(.+?)"', words[i]).group(1)
if form not in freq:
freq[form] = 1
else:
freq[form] += 1
return freq
def plural_adjectives(freqs):
forms = list(freqs.keys())
pluradj = []
for i in range(len(forms)):
if re.search('l.f.*', forms[i]):
adj_form = re.search('l.f.*', forms[i]).group()
if adj_form:
pluradj.append(adj_form)
pluradj_freq = {}
for i in range(len(pluradj)):
pluradj_freq[pluradj[i]] = freqs[pluradj[i]]
return pluradj_freq
def main():
lines_dict = openfile_lines('dict.txt')
word_list = find_words(lines_dict)
pure_info = purify_info_about_words(word_list)
freq_dict = count_forms(word_list)
pluradj_freq_dict = plural_adjectives(freq_dict)
with open('lines.txt', 'w', encoding = 'utf-8') as f:
f.write(str(len(lines_dict)))
with open('word forms.txt', 'w', encoding = 'utf-8') as f:
f.write('\n'.join(freq_dict.keys()))
with open('plural adjectives frequencies.txt', 'w', encoding = 'utf-8') as f:
text = ''
for key in pluradj_freq_dict:
text += str(key)+' '+str(pluradj_freq_dict[key])+'\n'
f.write(text)
with open('dictionary.csv', 'w', encoding='utf-8') as f:
header = ['лемма', 'грамматическая форма', 'словоформа']
f.write(','.join(header)+'\n')
for i in range(len(pure_info)):
f.write(','.join(pure_info[i])+'\n')
if __name__ == '__main__':
main()
import csv
def main():
clues = {}
with open('clues.csv', 'r', encoding='utf-8') as f:
text = csv.reader(f, delimiter=',')
for row in text:
clues[row[0]] = row[1]
n = 0
keys = list(clues.keys())
while n < len(clues):
i = 0
while i <= len(keys[n]):
if i < len(keys[n]):
response = input(keys[n]+'...')
if response == clues[keys[n]]:
print('Правильно!')
n += 1
break
else:
print('Неправильно. У тебя ещё '+str(len(keys[n]) - i+1)+' попыток.')
i += 1
elif i == len(keys[n]):
response = input(keys[n]+'...')
if response == clues[keys[n]]:
print('Правильно!')
n += 1
break
else:
print('У тебя закончились попытки. Правильный ответ: '+keys[n]+' '+clues[keys[n]])
n += 1
if __name__ == '__main__':
main()
n = int(input('Введите целое положительное число.'))
index = 0
while index < n:
index +=1
word = input('Введите слово.')
if word == 'программирование':
break
print(word)
word = input('Введите слово в русской раскладке.')
index = 0
while index < len(word):
index += 1
if word[len(word) - index] != 'з' and word[len(word) - index] != 'я':
print(word[len(word) - index])
import os
import re
import csv
def open_file_texts(directory):
raw_texts_dict = {}
for root, dirs, files in os.walk(directory):
for f in files:
with open(os.path.join(root, f), 'r', encoding='windows-1251') as t:
text = t.read()
raw_texts_dict[f] = text
return raw_texts_dict
def get_sentences(text):
sentences = re.findall('(.|\n)+?', text)
return sentences
def write_out_count_sentences(file_texts_dict):
with open('amount of sentences.txt', 'w', encoding='utf-8') as f:
for filename in file_texts_dict:
text = file_texts_dict[filename]
sent_am = len(get_sentences(text))
f.writelines(filename+'\t'+str(sent_am)+'\n')
def get_words(raw_text):
word_list = []
raw_lines = raw_text.split()
word_lines = re.findall('(.+?)((?:\n?[«»,.! \?\-])*)', raw_text)
for i in range(len(word_lines)):
line = word_lines[i][0].strip('').strip('')
ana, word = line.split('')
ana = ana.strip('>').strip().strip('ana').strip()
word_list.append([word] + [word_lines[i][1].strip().strip(' ')] + [ana])
return word_list
def create_clear_text_out_of_words(word_list):
text = []
for el in range(len(word_list)):
word = word_list[el]
d = re.match('\d+', word[2])
if '«' in word[2]:
text.append(word[0] + ' «')
elif d:
text.append(word[0] + ' ' + d.group(0) +' ')
else:
text.append(word[0] + word[2] + ' ')
return text
def find_file_meta (file_texts_dict):
file_meta_list = []
for filename in file_texts_dict:
text = file_texts_dict[filename]
author = re.search('', text)
if author:
author = re.search('', text).group(1)
topic = re.search('', text)
if topic:
topic = re.search('', text).group(1)
file_meta_list.append([filename, author, topic])
return file_meta_list
def write_out_file_meta (file_meta_list):
with open('file metadata.csv', 'w', encoding='utf-8') as n:
text = csv.writer(n, delimiter=';')
header = ['Название файла', 'Автор', 'Тематика текста']
text.writerow(header)
for row in file_meta_list:
text.writerow(row)
def find_spec_bigr_in_sentence(word_list):
spec_bigr = []
for i in range(len(word_list)):
word = word_list[i]
if i > 0:
previous_word = word_list[i-1]
if 'loc' in word[2] and 'PR' in previous_word[2]:
spec_bigr.append(previous_word[0]+' '+word[0])
return spec_bigr
def find_all_spec_bigr(raw_texts_dict):
sbec_bigr = []
texts = raw_texts_dict.values()
for text in texts:
sentences = get_sentences(text)
for sentence in sentences:
sentence_word_list = get_words(sentence)
sentence_spec_bigr = find_spec_bigr_in_sentence(sentence_word_list)
context = create_clear_text_out_of_words(sentence_word_list)
for bigr in sentence_spec_bigr:
sbec_bigr.append([bigr, context])
return sbec_bigr
def write_out_spec_bigr(spec_bigr):
with open('bigrams.txt', 'w', encoding='utf-8') as f:
for bigr in spec_bigr:
f.writelines(bigr[0]+'\t'+bigr[1]+'\n')
def main():
raw_texts_dict = open_file_texts('news')
write_out_count_sentences(raw_texts_dict)
file_meta = find_file_meta(raw_texts_dict)
write_out_file_meta(file_meta)
spec_bigr = find_all_spec_bigr(raw_texts_dict)
write_out_spec_bigr(spec_bigr)
if __name__ == '__main__':
main()
words = []
with open('words.txt','r', encoding = 'utf-8') as f:
text = f.read()
words = text.split('\n')
for i in range(len(words)):
if ' союз ' in words[i]:
print(words[i])
words = []
with open('words.txt','r', encoding = 'utf-8') as f:
text = f.read()
words = text.split('\n')
feminin = []
ipm = 0
word = ''
gram = ''
ipmi = ''
for i in range(len(words)):
if 'сущ' in words[i] and 'жен' in words[i]:
feminin.append(words[i])
word, gram, ipmi = words[i].split('|')
ipm += float(ipmi)
for i in range(len(feminin)):
print(feminin[i]+',')
print(ipm)
words = []
with open('words.txt','r', encoding = 'utf-8') as f:
text = f.read()
words = text.split('\n')
words1 = []
word = input('Print any russian word. ')
while word:
words1.append(word)
word = input('Print any russian word. ')
for i in range(len(words1)):
check = 0
for x in range(len(words)):
if words[x].count('|') == 2:
word, gram, ipmi = words[x].split('|')
if words1[i] == word.strip(' '):
print('grammar:', gram.strip(' ')+',' , 'ipm =', float(ipmi))
check = 1
if check == 0:
print('This word was not find in the dictionary.')
text = input('Type something: ')
for i in range(len(text)):
print(text[i:]+text[:i])
import re
def match_verb_forms(line):
infinitive = re.match(r'программировать(ся)?', line, re.I)
future = re.match(r'буд(е(шь|те?|м)|ут?) программировать', line, re.I)
present = re.match(r'программиру(ю|(е(те?|м|шь)))', line, re.I)
past = re.match(r'программировал(а|и)?', line, re.I)
past_participle = re.match(r'программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
present_participle = re.match(r'программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
transgressive_active = re.match(r'программируя', line, re.I)
transgressive_passive_past = re.match(r'будучи программированн(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
transgressive_passive_present = re.match(r'будучи программируем(ая|о(е|й|му?|го)|ы(й|е|ми?|х))', line, re.I)
if infinitive and not future:
match = infinitive
elif future:
match = future
elif present:
match = present
elif past:
match = past
elif past_participle:
match = past_participle
elif present_participle:
match = present_participle
elif transgressive_active:
match = transgressive_active
elif transgressive_passive_past and not past_participle:
match = transgressive_passive_past
elif transgressive_passive_present and not present_participle:
match = transgressive_passive_present
else:
match = None
return match
def open_forms(fname):
forms = []
with open (fname, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
forms = text.split()
for i in range(len(forms)):
forms[i] = forms[i].strip('.,?*()«»')
return forms
def main():
matches = []
forms = open_forms('test.txt')
for i in range(len(forms)-1):
if i < len(forms):
if match_verb_forms(forms[i] +' '+ forms[i+1]):
if match_verb_forms(forms[i] +' '+ forms[i+1]).group() not in matches:
matches.append(match_verb_forms(forms[i] +' '+ forms[i+1]).group())
else:
if match_verb_forms(forms[i]):
if match_verb_forms(forms[i]).group()not in matches:
matches.append(match_verb_forms(forms[i]).group())
print(*matches)
if __name__ == '__main__':
main()
import re
import os
import shutil
flist = os.listdir(os.getcwd())
clist = []
cfcount = 0
for n in flist:
cyrillic = 1
name = n.split('.')[0]
for let in name:
if not re.match('[А-Яа-яЁё]',let):
cyrillic = 0
if cyrillic == 1:
if os.path.isdir(n):
cfcount += 1
if name not in clist:
clist.append(name)
print(cfcount)
print(clist)
import re
import csv
def open_file(name):
with open(name, 'r', encoding='utf-8') as f:
file_text = f.read()
return file_text
def get_words(raw_text):
word_arr = []
raw_lines = raw_text.split()
word_lines = re.findall('(.+)((?:\n?[«»,.! \?\-])*(?:\n?[01234567])*)', raw_text)
for i in range(len(word_lines)):
line = word_lines[i][0].strip('').strip('').split(' 0:
line[e] = line[e].strip(' />')
word_arr.append([line[0]] + [len(line)-1] + [word_lines[i][1].strip().strip(' ')] + line[1:])
return word_arr
def count_average_anas(word_arr):
total = 0
average = 0
for i in range(len(word_arr)):
total += word_arr[i][1]
average = total/len(word_arr)
return average
def count_all_pos(word_arr):
pos_dict = {}
for i in range(len(word_arr)):
for el in range(len(word_arr[i])):
if el > 2:
pos = re.search('gr="(\w+)', word_arr[i][el]).group(1)
if pos not in pos_dict:
pos_dict[pos] = 1
else:
pos_dict[pos] += 1
with open('parts of speech frequency.txt', 'w', encoding='utf-8') as f:
for pos in pos_dict:
f.writelines(pos+'\t'+str(pos_dict[pos])+'\n')
return pos_dict
def make_text(word_arr):
text = []
for el in range(len(word_arr)):
word = word_arr[el]
d = re.match('\d+', word[2])
if '«' in word[2]:
text.append(word[0] + ' «')
elif d:
text.append(word[0] + ' ' + d.group(0) +' ')
else:
text.append(word[0] + word[2] + ' ')
return text
def find_all_instr(word_arr, text):
instr_words_dict = {}
for n in range(len(word_arr)):
word = word_arr[n]
for i in range(len(word)):
if i > 1:
instr = re.search('ins', word[i])
if instr:
if word[0] not in instr_words_dict:
instr_words_dict[word[0]] = [n]
print(instr_words_dict[word[0]])
elif n not in instr_words_dict[word[0]]:
instr_words_dict[word[0]] += [n]
print(instr_words_dict[word[0]])
with open('words in instrumentalis.txt', 'w', encoding='utf-8') as f:
for word in instr_words_dict:
for i in range(len(instr_words_dict[word])):
x = instr_words_dict[word][i]
y = min(x+4, len(text)-1)
f.writelines(''.join(text[x-3:x])+'\t'+word+'\t'+''.join(text[x+1:y])+'\n')
return instr_words_dict
def main():
raw_text = open_file('text.xml')
word_arr = get_words(raw_text)
average_anas = count_average_anas(word_arr)
print(average_anas)
count_all_pos(word_arr)
find_all_instr(word_arr, make_text(word_arr))
if __name__ == '__main__':
main()
import re
def open_text_phrases(fname):
phrases = []
with open (fname, 'r', encoding = 'utf-8') as f:
text = f.read()
text = re.sub('\.\.\.|[\.\?]', '!', text)
phrases = text.split('!')[:-1]
for i in range(len(phrases)):
phrases[i] = re.sub('[<>\*\.«»,\'\"]','', phrases[i])
phrases[i] = phrases[i].strip()
return phrases
def main():
phrase_list = open_text_phrases('text.txt')
word_length_list = [[w, len(w)] for phrase in phrase_list for w in phrase.split()]
template = '{}_{}'
for word in word_length_list:
print(template.format(word[0], word[1]))
if __name__ == '__main__':
main()
import re
import csv
def openforms(text):
forms = []
text = text.lower()
forms = text.split()
for i in range(len(forms)):
forms[i] = forms[i].strip('.,?*()«»!\'\":; ')
return forms
def freqlist(forms):
freqs = {}
for i in range(len(forms)):
if forms[i] not in freqs:
freqs[forms[i]] = 1
else:
freqs[forms[i]] +=1
return freqs
def freqlist_to_csv(freqs):
with open('freq.csv', 'w', encoding='utf-8') as f:
output = csv.writer(f, delimiter=',')
header = ['слово', 'частота']
output.writerow(header)
for key in sorted(freqs):
output.writerow([key, freqs[key]])
def agosforms(text):
agos = re.findall('(?:(?:[А-Яа-яіѢѣЁё])+[\s,.!\?:;"\(\)\'»\n\t—]+?){3}[А-Яа-яiѢѣ]+?аго [А-Яа-яiѢѣ]+?(?:а|и)[\s,.!\?:;"\(\)\'»\n\t—]{,5}(?:[А-Яа-яiѢѣ]+?[\s,.!\?;:—"\(\)\'»\n\t]+?){3}',text)
with open('agos.txt', 'w', encoding='utf-8') as f:
output = f.write('\n'.join(agos))
def main():
with open ('Лесков.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
forms = openforms(text)
print(len(forms))
freqs = freqlist(forms)
freqlist_to_csv(freqs)
agosforms(text)
if __name__ == '__main__':
main()
import re
def main():
with open('cats.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
mark_dogs = re.sub('([Сс]обак(?:а(?:х|ми?)?|и|е|у|о(?:й|ю))?)([\s,.!\?:"\(\)\'»\n\]\[-])', '<<<тут было слово \\1>>> \\2', text)
catstodogs = re.sub('([\s,.!\?:"\(\)\'«\n-])коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1собак\\2\\3', mark_dogs)
CatstoDogs = re.sub('([\s,.!\?:"\(\)\'«\n-])Коше?к(а(?:х|ми?)?|и|е|у|о(?:й|ю))?([\s,.!\?:"\(\)\'»\n-\]\[])', '\\1Собак\\2\\3', catstodogs)
dogstocats = re.sub('<<<тут было слово собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'кошк\\1', CatstoDogs)
dogstocats2 = re.sub('<<<тут было слово собак>>>', 'кошек', dogstocats)
DogstoCats = re.sub('<<<тут было слово Собак(а(?:х|ми?)?|и|е|у|о(?:й|ю))>>>', 'Кошк\\1', dogstocats2)
DogstoCats2 = re.sub('<<<тут было слово Собак>>>', 'Кошек', DogstoCats)
catishtodogish = re.sub('кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'собач\\1', DogstoCats2)
CatishtoDogish = re.sub('Кошач(ь(?:и(?:ми?|х)?|е(?:му|го|й)|я|ю)?|ий)', 'Собач\\1', catishtodogish)
kittenstopyppies = re.sub('котята','щенята', CatishtoDogish)
KittenstoPyppies = re.sub('Котята','Щенята', kittenstopyppies)
kittentopyppy = re.sub('кот(?:е|ё)н(ок|ку)','щен\\1', KittenstoPyppies)
KittentoPyppy = re.sub('Кот(?:е|ё)н(ок|ку)','Щен\\1', kittentopyppy)
print(KittentoPyppy)
if __name__ == '__main__':
main()
import re
def main():
with open('dates.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
dates = re.findall('(?:(?:0|1|2)|3(?:0|1))[0-9]\.(?:0|1(?:1|2)?)[0-9]\.[0-9]{2}', text)
print(*dates)
if __name__ == '__main__':
main()
import re
def main():
with open('aphasy.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', text)
while re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear) != clear:
clear = re.sub('(\w+)(?:,?|\.*?) \\1', '\\1', clear)
print(clear)
if __name__ == '__main__':
main()
import re
def three_consonants(text):
cons3 = re.findall('[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[йцкнгшщзхфвпрлджчсмтб]{3}[^\s,.!\?:"\(\)\'«»\nйцкнгшщзхфвпрлджчсмтб]*?[^\s,.!\?:"\(\)\'«»\n]*?[\s,.!\?:"\(\)\'»\n]', text, re.I)
for i in range(len(cons3)):
cons3[i] = cons3[i].strip('\s,.!\?:"\(\)\'»\n\t ')
return cons3
def startwith(text):
abcs = re.findall(r'\b(?:а|о)(?:б|в).+?[\s,.!\?:"\(\)\'»\n]', text, re.I)
for i in range(len(abcs)):
abcs[i] = abcs[i].strip('\s,.!\?:"\(\)\'»\n\t ')
return abcs
def proper_nouns(text):
proper = re.findall('[а-яёa-z0-9] [А-ЯЁA-Z][а-яёa-z]+?[\s,.!\?:"\(\)\'»\n]' , text)
for i in range(len(proper)):
proper[i] = proper[i].split()[1]
proper[i] = proper[i].strip('\s,.!\?:"\(\)\'»\n\t ')
return proper
def analytical_future(text):
future = re.findall('буд(?:е(?:шь|те?|м)|ут?) .+?(?:а|е|и)ть(?:ся)?', text, re.I)
return future
def polysyllabic(text):
poly = re.findall(r'\b(?:[йцкнгшщзхфвпрлджчсмтб]*?[уеыаоюяиэ]){5,}[а-я]*?[\s,.!\?:"\(\)\'»\n]', text)
for i in range(len(poly)):
poly[i] = poly[i].strip('\s,.!\?:"\(\)\'«»\n\t ')
return poly
def roman_num(text):
rawroman = re.findall('\sC?M*?C?D?L?C{,4}X?L?I?X{,4}I?V?I{,4}\s', text)
roman = []
for i in range(len(rawroman)):
rawroman[i] = rawroman[i].strip('\s,.!\?:"\(\)\'«»\n\t ')
if rawroman[i]:
roman.append(rawroman[i])
return roman
def main():
with open('text.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
if __name__ == '__main__':
main()
import re
def clean(html):
noscript = re.sub('', '', html)
nostyle = re.sub('', '', noscript)
nospan = re.sub(']*?>[^<>]*?', '', nostyle)
notags = re.sub('<[^>]*>', '', nospan)
notags1 = re.sub('{[^}]*}', '', notags)
text = re.sub('[&][^;]*;', ' ', notags1)
text = re.sub(r'\s+', ' ', text)
return text
def html(text):
tags = re.findall(r'<[^>]*?>', text)
return tags
def main():
with open('schizo.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
with open('html.txt', 'w', encoding = 'utf-8') as f:
output = f.write('\n'.join(html(text)))
with open('pure.txt', 'w', encoding = 'utf-8') as f:
output = f.write(clean(text))
if __name__ == '__main__':
main()
import re
def main():
given = input('Введите свой телефонный номер: ')
right = re.search('\+7 \([0-9]{3}\) [0-9]{3}-[0-9]{2}-[0-9]{2}', given)
if right:
print('Введённый номер совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.')
if re.search('\(9(?:2|3)', given):
print('Это Мегафон.')
elif re.search('\(9(?:1|8)', given):
print('Это МТС.')
elif re.search('\(96', given):
print('Это Билайн.')
else:
print('Я не могу точно сказать, какой это оператор.')
else:
print('Введённый номер не совпадает с шаблоном +7 (ХХХ) ХХХ-ХХ-ХХ.')
if re.search('\(9(?:2|3)', given) or re.search('\+7 ?9(?:2|3)', given) or re.match('8 ?9(?:2|3)', given):
print('Это Мегафон.')
elif re.search('\(9(?:1|8)', given) or re.search('\+7 ?9(?:1|8)', given) or re.match('8 ?9(?:1|8)', given):
print('Это МТС.')
elif re.search('\(96', given) or re.search('\+7 ?96', given) or re.match('8 ?96', given):
print('Это Билайн.')
else:
print('Я не могу точно сказать, какой это оператор.')
if __name__ == '__main__':
main()
import re
def revert(dictionary):
reverted = {}
for key in dictionary:
reverted[dictionary[key]] = key
return reverted
def russian_to_latin_dictionary(lines):
raw = {}
rus_to_lat = {}
for i in range(len(lines)):
raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n')
raw = revert(raw)
for key in raw:
if len(key.split(',')) > 1:
for i in range(len(key.split(','))):
rus_to_lat[key.split(',')[i-1].strip()] = raw[key]
i +=10
else:
rus_to_lat[key] = raw[key]
return rus_to_lat
def latin_to_russian_dictionary(lines):
raw = {}
lat_to_rus = {}
for i in range(len(lines)):
raw[lines[i].split(' — ')[0]] = lines[i].split(' — ')[1].strip('\n')
for key in raw:
if len(key.split(',')) > 1:
for i in range(len(key.split(','))):
lat_to_rus[key.split(',')[i-1].strip()] = raw[key]
i +=10
else:
lat_to_rus[key] = raw[key]
return lat_to_rus
def main():
with open ('latin.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for i in range(len(lines)):
lines[i] = re.sub('(?:–|−|-)', '—', lines[i])
lines[i] = re.sub(';', ',', lines[i])
print(latin_to_russian_dictionary(lines))
print(russian_to_latin_dictionary(lines))
if __name__ == '__main__':
main()
def process(fname):
with open (fname, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
forms = text.split()
for i in range(len(forms)):
forms[i] = forms[i].strip('.,!?*()«»\'":][><')
return forms
def freqlist(forms):
freqs = {}
for i in range(len(forms)):
if forms[i] not in freqs:
freqs[forms[i]] = 1
else:
freqs[forms[i]] +=1
return freqs
def maxfreq(frequencies):
maximumfreq = []
for key in frequencies:
if frequencies[key] == max(frequencies.values()):
maximumfreq.append(key)
return maximumfreq
def averagefreq(frequencies):
total = 0
for key in frequencies:
total += frequencies[key]
average = total/len(frequencies)
return average
def main():
forms = process('text.txt')
frequencies = freqlist(forms)
print(*maxfreq(frequencies), '- самое частотное слово в тексте.')
print(averagefreq(frequencies), '- средняя частота слов в тексте.')
if __name__ == '__main__':
main()
import random
def ask():
user_info = []
name = input('Как Вас зовут? ')
surname = input('Какая у Вас фамилия? ')
age = input('Сколько Вам лет? ')
food = input('Какая у Вас любимая еда? ')
musician = input('Какая у Вас любимая музыкальная группа? ')
dream = input('Какая у Вас заветная мечта? ')
user_info.append(name+' '+surname)
user_info.append([age, food, musician, dream])
return user_info
def guess(database_dictionary):
person = random.choice(list(database_dictionary.keys()))
clue = random.choice(['его/её мечта: '+database_dictionary[person][3], 'его/её любимая музыкальная группа: '+database_dictionary[person][2], 'его/её мечта: '+database_dictionary[person][3]+'\nего/её любимая еда: '+database_dictionary[person][1], 'его/её любимая музыкальная группа: '+database_dictionary[person][2]+'\nего/её любимая еда: '+database_dictionary[person][1]])
guess = input('Угадайте, кто это (имя и фамилию)? Подсказка: '+clue+' ')
if guess == person:
return 'Правильно!'
else:
return 'Нет, неправильно, это - '+person
def main():
database = {}
i = 0
while i < 7:
answer = ask()
i += 1
database[answer[0]] = answer[1]
print(guess(database))
if __name__ == '__main__':
main()
import re
def main():
with open('Лингвистика.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
lang = 'язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])'
Lang = 'Язык((?:а(?:ми?|х)?)|и|о(?:в|м)|у|е)?([\s,.!\?:"\(\)\'»])'
new_text = re.sub(lang,'шашлык\\1\\2', text)
new_text = re.sub(Lang,'Шашлык\\1\\2', new_text)
with open('Новая лингвистика.txt', 'w', encoding = 'utf-8') as f:
f.write(new_text)
if __name__ == '__main__':
main()
import os
extension_frequency_list = {}
for root, dirs, files in os.walk('.'):
for f in files:
file_name = f.split('.')[0]
file_ext = f.split('.')[1]
if file_ext not in extension_frequency_list:
extension_frequency_list[file_ext] = 1
else:
extension_frequency_list[file_ext] += 1
max_ext = max(extension_frequency_list.values())
i = 0
for key in extension_frequency_list:
if extension_frequency_list[key] == max_ext:
if i == 0:
print('The most frequent extention is \''+key+'\'. There is(are) '+str(extension_frequency_list[key])+' file(s) with it.')
i = 1
else:
print('There is(are) also '+str(extension_frequency_list[key])+' \''+key+'\' file(s).')
def opentext(fname):
forms = []
with open (fname, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
forms = text.split()
for i in range(len(forms)):
forms[i] = forms[i].strip('.,!?*()«»\'"')
return forms
def adj_hood(fname):
words = opentext(fname)
hoods = []
for i in range(len(words)):
if len(words[i])>4:
if words[i][-1] == 'd':
if words[i][-2] == 'o':
if words[i][-3] == 'o':
if words[i][-4] == 'h':
if words[i] not in hoods:
hoods.append(words[i])
return hoods
def count_frequency(fname, word):
words = opentext(fname)
word_freq = 0
for i in range(len(words)):
if words[i] == word:
word_freq += 1
return word_freq
def main():
fname = input('Введите имя файла: ')
hoods = adj_hood(fname)
print('В тексте встретилось', len(hoods), 'прилагательных с суффиксом -hood.')
freq = []
for i in range(len(hoods)):
freq.append(count_frequency(fname, hoods[i]))
min_freq = []
for i in range(len(hoods)):
if freq[i] == min(freq):
min_freq.append(hoods[i])
print('Самые редкие прилагательные с суффиксом -hood: ', ', '.join(min_freq))
roots = []
for i in range(len(hoods)):
roots.append(hoods[i][0:-4])
print('Корни прилагательных с суффиксом -hood: ', ', '.join(roots))
if __name__ == '__main__':
main()
words = []
with open('text.txt','r', encoding = 'utf-8') as f:
text = f.read()
words_raw = text.split()
words = []
for i in range(len(words_raw)):
words.extend(words_raw[i].split('\n'))
len3 = 0
len1 = 0
for word in words:
if len(word) == 3:
len3 += 1
elif len(word) == 1:
len1 += 1
if len1 == 0:
print('В файле нет слов длины 1.')
elif len3 == 0:
print('В файле нет слов длины 3.')
else:
print('В файле в '+str(len3/len1)+' раз больше слов длины 3, чем слов длины 1.')
import random
def nom_noun():
with open('nomnouns.txt','r', encoding = 'utf-8') as f:
nomnouns = f.read().split('\n')
return random.choice(nomnouns)
def acc_noun():
with open('accnouns.txt','r', encoding = 'utf-8') as f:
accnouns = f.read().split('\n')
return random.choice(accnouns)
def adverb():
with open('adverbs.txt','r', encoding = 'utf-8') as f:
adverbs = f.read().split('\n')
return random.choice(adverbs)
def intensifier(adv):
with open('intensifiers.txt','r', encoding = 'utf-8') as f:
intensifiers = f.read().split('\n')
return random.choice(intensifiers) + ' ' + adv
def verb_of_thought(subj):
with open('thoughtverbs.txt','r', encoding = 'utf-8') as f:
thoughtverbs = f.read().split('\n')
return subj + ' ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.'
def trans_verb(subj, obj):
with open('transverbs.txt','r', encoding = 'utf-8') as f:
transverbs = f.read().split('\n')
return subj + ' ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj
def trans_verb_negative(subj, obj):
with open('transverbs.txt','r', encoding = 'utf-8') as f:
transverbs = f.read().split('\n')
negative_sentences = [subj + ' не ' + intensifier(adverb()) + ' ' + random.choice(transverbs)+ ' ' + obj, subj + ' ' + intensifier(adverb()) + ' не ' + random.choice(transverbs)+ ' ' + obj]
return random.choice(negative_sentences)
def verb_of_thought_negative(subj, obj):
with open('thoughtverbs.txt','r', encoding = 'utf-8') as f:
thoughtverbs = f.read().split('\n')
return subj + ' не ' + random.choice(thoughtverbs) + ', что ' + trans_verb(nom_noun(), acc_noun()) + '.'
def positive():
positive_sentences = [trans_verb(nom_noun(), acc_noun()) + '.', verb_of_thought(nom_noun())]
return random.choice(positive_sentences)
def question():
questions = ['зачем ' + trans_verb(nom_noun(), acc_noun()) + '?', 'почему ' + verb_of_thought(nom_noun())]
return random.choice(questions)
def negative():
negative_sentences = [verb_of_thought_negative(nom_noun(), acc_noun()), trans_verb_negative(nom_noun(), acc_noun())]
return random.choice(negative_sentences)
def conditional():
with open('transverbs.txt','r', encoding = 'utf-8') as f:
transverbs = f.read().split('\n')
conditional_sentences = ['если ' + positive().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + positive().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' ' + random.choice(transverbs)+ ' ' + acc_noun(), 'если ' + negative().strip('.') + ', то ' + nom_noun() + ' не ' + random.choice(transverbs)+ ' ' + acc_noun()]
return random.choice(conditional_sentences)
def imperative():
with open('imperatives.txt','r', encoding = 'utf-8') as f:
imperatives = f.read().split('\n')
imperative_sentences = ['пусть ' + positive(), 'пусть ' + negative(), 'пусть ' + conditional(), random.choice(imperatives) + ' ' + acc_noun()]
return random.choice(imperative_sentences)
def main():
sentences = [positive(), question(), negative(), conditional(), imperative()]
random.shuffle(sentences)
for i in range(5):
print(sentences[i].capitalize())
if __name__ == '__main__':
main()
a = input("Введите первое число. ")
a = float(a)
b = input("Введите второе число. ")
b = float(b)
c = input("Введите третье число. ")
c = float(c)
if b:
if a%b == c:
print("a даёт остаток c при делении на b")
else:
print("a не даёт остаток c при делении на b")
else:
print("Делите на ноль сами!")
if (a*c)+b == 0:
print("c является решением линейного уравнения ax + b = 0")
else:
print("c не является решением линейного уравнения ax + b = 0")
list_list = [['l'],['s'],['d']]
def el_0(any_list):
return any_list[0]
a = ' '.join([el_0(el) for el in list_list])
print(a)
b = list(map(el_0, list_list))
print(b)
b = ' '.join(list(map(el_0, list_list)))
print(b)
c = ' '.join(list(map(lambda any_list: any_list[0], list_list)))
print(c)
number = input("print any number")
square = int(number)**0.5
if square/1 == square//1:
print ("yes")
else:
print ("no")
import codecs, re
def open_file(title):
a = codecs.open(title, 'r', 'utf-8')
words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()]
return words
def find_bigramm(words):
text = ''
for word in words:
text += word + ' '
found = 0
for x in range(len(words)-1):
bigramm = words[x] + ' ' + words[x+1]
m = re.findall(bigramm, text, flags = re.U)
if len(m) > 2:
print(True)
found = 1
break
if found == 0:
print(False)
def main():
f = open_file('text.txt')
z = find_bigramm(f)
if __name__ == "__main__":
main()
import os, codecs
def open_file(title):
a = codecs.open(title, 'r', 'utf-8')
words = [word.strip(' ,.?!-:;').lower() for word in a.read().split()]
return words
def count_word_frequency(words):
freq_dict = {}
for word in words:
try:
freq_dict[word] += 1
except KeyError:
freq_dict[word] = 1
return freq_dict
def find_max_keys(dict_num_values, amount):
values_list = dict_num_values.values()
max_values = []
i = 0
while i < amount:
local_max = max(values_list)
max_values.append(local_max)
if local_max != 1:
values_list = [x for x in values_list if x != local_max]
i += 1
max_keys = []
for key in dict_num_values:
if dict_num_values[key] in max_values:
max_keys.append(key)
return max_keys
def extract_words_from_txt_in_folder(path):
words = []
for root, dirs, files in os.walk(path):
for f in files:
if len(f.split('.')) == 2:
file_name = f.split('.')[0]
file_ext = f.split('.')[1]
if file_ext == 'txt':
words += open_file(os.path.join(root, f))
return words
def main():
print(find_max_keys(count_word_frequency(extract_words_from_txt_in_folder('.')),10))
if __name__ == "__main__":
main()
import random
with open('words.txt','r', encoding = 'utf-8') as f:
lines = f.readlines()
lenlines = len(lines)
random.shuffle(lines)
score = 0
for line in lines:
line = line.strip()
word, hint = line.split(' ',1)
response = input('Какое слово я загадала?\n'+
'Подсказка: '+hint+' ')
if response == word:
print('Правильно!')
score += 1
else:
print('Нет, слово было', word)
with open('scores.txt', 'w', encoding = 'utf-8') as n:
percent = score/lenlines*100
n.write('Вот результат: ')
n.write(str(percent)+'%')
import codecs
def open_file(file_name):
f = codecs.open(file_name, 'r', 'utf-8')
words = []
for line in f:
line = line.strip()
words += line.split()
for word in words:
word = word.strip('.,!?:;()\'\"1234567890')
word = word.lower()
return words
def bigramms(words):
bi = create_list(words)
dic = {}
for j in bi:
if j not in dic:
dic[j] = 1
else:
dic[j] += 1
answer = ''
answer = [n+'\r\n' for n in dic]
for key in dic:
if dic[key] > 2:
answer = True
else:
answer = False
print(answer)
return answer
def create_list(words):
bi = []
for i in range(len(words)):
if i < len(words) - 1:
j = i+1
bi.append(words[i] + ' ' + words[j])
return bi
words = open_file('text.txt')
bigramms(words)
import re
def main():
with open ('china space programm.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
reg = '«[А-ЯЁа-яё]+?-[1-9]+»'
all_matches = re.findall(reg, text)
pure_names =[]
for i in range(len(all_matches)):
if re.sub(r'-[1-9]+', '', all_matches[i]) not in pure_names:
pure_names.append(re.sub(r'-[1-9]+', '', all_matches[i]))
all_matches += pure_names
print(all_matches)
if __name__ == '__main__':
main()
def opentext(fname):
forms = []
with open (fname, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
forms = text.split()
for i in range(len(forms)):
forms[i] = forms[i].strip('.,?*()«»')
return forms
def first_letter(letter, fname = 'text.txt'):
starting_with_letter = []
forms = opentext(fname)
for i in range(len(forms)):
if forms[i][0] == letter:
starting_with_letter.append(forms[i])
return starting_with_letter
def questions():
fname = input('Введите имя файла: ')
letter = input('Введите букву: ')
number = int(input('Введите целое число: '))
starting_with_letter = first_letter(letter, fname)
answer = []
for i in range(len(starting_with_letter)):
if len(starting_with_letter[i]) > number:
answer.append(starting_with_letter[i])
return answer
def adjectives(fname):
forms = opentext(fname)
adj = []
for i in range(len(forms)):
if len(forms[i]) > 2:
if forms[i][-1] == 'й':
if forms[i][-2] == 'o' or forms[i][-2] == 'ы' or forms[i][-2] == 'и':
if i != len(forms)-1:
adj.append(forms[i]+' '+forms[i+1])
else:
adj.append(forms[i])
elif forms[i][-1] == 'я':
if forms[i][-2] == 'а' or forms[i][-2] == 'я':
if i != len(forms)-1:
adj.append(forms[i]+' '+forms[i+1])
else:
adj.append(forms[i])
elif forms[i][-1] == 'е':
if forms[i][-2] == 'o' or forms[i][-2] == 'е':
if i != len(forms)-1:
adj.append(forms[i]+' '+forms[i+1])
else:
adj.append(forms[i])
return adj
with open('information.txt', 'w', encoding = 'utf-8') as n:
name = input('Как Вас зовут? ')
n.write(name+'\n')
age = input('Сколько Вам лет? ')
n.write(str(age)+'\n')
color = input('Какой у Вас любимый цвет? ')
n.write(color+'\n')
musician = input('Какой у Вас любимый исполнитель? ')
n.write(musician+'\n')
dream = input('Какая у Вас мечта? ')
n.write(dream+'\n')
with open('information about Mary.txt','r', encoding = 'utf-8') as f:
info = f.readlines()
for line in range(len(info)):
info[line] = info[line].strip()
response = input('Как Вашего соседа зовут? ')
if response == info[0]:
print('Правильно!')
else:
print('Нет, его зовут '+info[0]+'.')
response = input('Сколько Вашему соседу лет? ')
if str(response) == info[1]:
print('Правильно!')
else:
print('Нет, ему '+info[1]+' лет.')
response = input('Какой у Вашего соседа любимый цвет?')
if response == info[2]:
print('Правильно!')
else:
print('Нет, его любимый цвет - '+info[2]+'.')
response = input('Какой у Вашего соседа любимый исполнитель?')
if response == info[3]:
print('Правильно!')
else:
print('Нет, его любимый исполнитель - '+info[3]+'.')
response = input('Какая у Вашего соседа мечта?')
if response == info[4]:
print('Правильно!')
else:
print('Нет, его мечта - '+info[4]+'.')
number = input("What's your phone number")
if '+1' in number or number.endswith("2"):
pass
elif '+7' in number or number.startswith('8'):
print("Как дела в России?")
elif '+4' in number:
print("Как дела в Англии?")
else:
print("Как дела в мире?")
import os, codecs
from math import log
def preprocess(text):
punct = '[.,!«»?&@"$\[\]\(\):;%
tabs = '\t\n'
text_wo_punct = re.sub(punct, '', text.lower())
text_wo_punct = re.sub(tabs, '', text_wo_punct)
words = text_wo_punct.strip().split()
return words
def count_tf(word, text):
n = text.count(word)
return n/len(text)
def count_df(word, texts):
i = [True for text in texts if word in text]
df = len(i)
return df
def count_idf(word, texts):
df = count_df(word, texts)
try:
idf = len(texts)/df
except ZeroDivisionError:
return 0
return idf
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
tfidf = log(tf, 10) * log(idf, 10)
return tfidf
def extract_textS_from_folder(path):
texts = []
for root, dirs, files in os.walk(path):
for f in files:
with open(os.path.join(root, f) , "r", encoding = 'utf-8') as t:
content = t.read
text = preprocess(content)
texts.append(text)
return texts
def keywords(text, texts):
keywords = {}
dic_tfidf = {}
for word in text:
if word in dic_tfidf:
continue
tfidf = count_tfidf(word, text, texts)
dic_tfidf[word] = tfidf
i = 0
for el in sorted(dic_tfidf, key = lambda x: dic_tfidf(x)):
if i > 5:
break
else:
i += 1
keywords[el] = dic_tfidf[el]
return keywords
def main():
texts = extract_text_from_folder('wikipedia')
for t in texts:
kwords = keywords(t, texts)
for key in kwords:
print(key, kwords[key])
if __name__ == "__main__":
main()
import os
import re
from math import log
def preprocessing(text):
punct = '[.,_!«»?&@"$\/\\[\]\(\):;%
tabs = '\n\t\s'
num = '[0-9]'
text_wo_punct = re.sub(punct, '', text.lower())
text_wo_punct = re.sub(tabs, ' ', text_wo_punct)
text_wo_punct = re.sub(num, '', text_wo_punct)
words = text_wo_punct.strip().split()
return words
def count_tf(word, text):
i = 0
for w in text:
if w == word:
i += 1
tf = i / len(text)
return tf
def count_df(word, texts):
i = 0
i = [1 for text in texts if word in text]
df = sum(i)
return df
def count_idf(word, texts):
df = count_df(word, texts)
idf = len(texts)/ (1 + df)
return idf
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
tfidf = log(tf, 10) * log(idf, 10)
return tfidf
def keywords(text, texts):
dic_tfidf = {}
kwords = {}
for word in text:
if word in dic_tfidf:
continue
tfidf = count_tfidf(word, text, texts)
dic_tfidf[word] = tfidf
i = 0
for el in sorted(dic_tfidf, key=lambda x: dic_tfidf[x]):
if i > 5:
break
else:
i += 1
kwords[el] = dic_tfidf[el]
return kwords
def main():
texts = {}
for root, dirs, files in os.walk('wikipedia'):
for f in files:
with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
content = t.read()
text = preprocessing(content)
texts[f] = text
raw_texts = list(texts.values())
for t in texts:
print('\nИзвлекаем ключевые слова для текста "{}"'.format(t.split('.')[0]))
kwords = keywords(texts[t], raw_texts)
for key in kwords:
print (key, kwords[key])
if __name__ == '__main__':
main()
import re
import os
from math import log
def open_words(fname):
forms = []
with open (fname, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
punct = '[.,?!|:;*№\"\'—@
text = re.sub(punct, '', text)
text = re.sub('[\n\t]', ' ', text)
forms = text.split()
return forms
def make_freq(arr):
d = {}
for el in arr:
try:
d[el] += 1
except KeyError:
d[el] = 1
return d
def make_bigrams(arr):
bigrams = []
for i in range(len(arr)-1):
bigr = arr[i] + ' ' + arr[i+1]
bigrams.append(bigr)
return bigrams
def count_pmi(x, y):
try:
p_x = word_freq[x]/len(words)
except KeyError:
p_x = 0
try:
p_y = word_freq[x]/len(words)
except KeyError:
p_y = 0
try:
bigr = x + ' ' + y
p_xy = bigrams_freq[bigr]/len(bigrams)
except KeyError:
p_xy = 0
try:
pmi = log(p_xy/(p_x*p_y))
except ZeroDivisionError:
pmi = 0
return pmi
def calculate_pmi():
pmis = {}
for bigr in bigrams:
x, y = bigr.split()
pmi = count_pmi(x, y)
pmis[bigr] = pmi
return pmis
def calculate_pmi_cats(word, cathegory):
p_word = freq_all[word]/len(words_all)
p_cat = 1/3
if cathegory == 'anek':
d = freq_anek
w = len(corpus_anek_words)
elif cathegory == 'izvest':
d = freq_izvest
w = len(corpus_izvest_words)
elif cathegory == 'teh':
d = freq_teh
w = len(corpus_teh_words)
p_word_cat = d[word]/w
pmi = log(p_word_cat/(p_word*p_cat))
return pmi
def main():
corpus_anek_words = []
corpus_izvest_words = []
corpus_teh_words = []
for root, dirs, files in os.walk('texts'):
if 'anekdots' in root:
for f in files:
corpus_anek_words += open_words(os.path.join(root, f))
if 'teh_mol' in root:
for f in files:
corpus_teh_words += open_words(os.path.join(root, f))
if 'izvest' in root:
for f in files:
corpus_izvest_words += open_words(os.path.join(root, f))
words = corpus_anek_words + corpus_teh_words + corpus_izvest_words
freq_anek = make_freq(corpus_anek_words)
freq_izvest = make_freq(corpus_izvest_words)
freq_teh = make_freq(corpus_teh_words)
freq_all = make_freq(words)
words_cathegory_dict = {}
for w in words:
i = 0
try:
if i < 100:
pmi_anek = calculate_pmi_cats(w, 'anek')
pmi_cats(w, 'anek')
pmi_izvest = calculate_pmi_cats(w, 'izvest')
pmi_teh = calculate_pmi_cats(w, 'teh')
pmi_max = max(pmi_anek, pmi_izvest, pmi_teh)
if pmi_max == pmi_anek:
words_cathegory_dict[w] = 'anek'
if pmi_max == pmi_teh:
words_cathegory_dict[w] = 'teh'
if pmi_max == pmi_anek:
words_cathegory_dict[w] = 'teh'
i += 1
except KeyError:
pass
print(words_cathegory_dict)
if __name__ == '__main__':
main()
import shutil
import os
name = input('Print any sentence. ')
words = name.split()
path = words[0]
for i in range(1, len(words)):
path = os.path.join(path, words[i])
os.makedirs(path)
import shutil
import os
num = int(input('Print any natural number. '))
for i in range(num):
name = str(i+1)
os.makedirs(name)
for a in range(i+1):
filename = os.path.join(name,str(a+1)+'.txt')
with open(filename, 'w', encoding = 'utf-8') as f:
f.write('')
import os
import shutil
filelist = [f for f in os.listdir() if os.path.isfile(f)]
print(filelist)
import os
import shutil
path = os.path.abspath('.')
path2 = os.getcwd()
universalpath = os.path.join('texts', '1.txt')
exists = os.path.exists('texts\1.txt')
exists2 = os.path.exists(os.path.join('texts', '1.txt'))
filelist = os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts')
s = 'Hello! '
i = 1
for f in filelist:
if f.endswith('.txt'):
with open(f, 'a', encoding = 'utf-8') as w:
w.write(s*1)
i += 1
texts = [f for f in os.listdir(r'C:\My\HSE\programming\HSE_programming\HSE_programming\CWs\CW13\texts') if f.endswith('.txt')]
if not os.path.exists('ab'):
os.mkdir('ab')
if not os.path.exists(r'a\long\long\long\long\path'):
os.makedirs(r'a\long\long\long\long\path')
if os.path.exists('ab') and not os.path.exists('abc'):
os.rename('ab', 'abc')
if os.path.exists(r'a\long\long\long') and not os.path.exists(r'a\long\long\longer'):
os.rename(r'a\long\long\long', r'a\long\long\longer')
isfile = os.path.isfile(r'texts\1.txt')
isdir = os.path.isdir(r'a\long\long')
print(os.listdir())
shutil.copy(r'texts\1.txt', r'newcorpus')
shutil.copytree(r'texts', r'corpus')
shutil.move(r'texts\2.txt', r'newcorpus')
os.remove(r'corpus\2.txt')
shutil.rmtree('newcorpus')
shutil.rmtree('a')
shutil.rmtree('abc')
import os
import shutil
directory = input('Print any path working in your OS. ')
if os.path.exists(directory):
filelist = [f for f in os.listdir() if os.path.isfile(f)]
extlist = []
for f in filelist:
ext = f.split('.')[1]
if ext not in extlist:
extlist.append(ext)
extdict = {}
for ext in extlist:
for f in filelist:
if f.endswith(ext):
if ext not in extdict:
extdict[ext] = 1
else:
extdict[ext] +=1
else:
directory = os.getcwd()
print(extdict)
import re
def main():
with open ('hse.html', 'r', encoding = 'utf-8') as f:
text = f.read()
card_reg = ''
if re.search(card_reg, text):
card = re.search(card_reg, text).group()
t_reg = 'Преподаватели(?:.|\n)*?(.+?)<'
if re.search(t_reg, card):
profs = re.search(t_reg, card).group(1)
with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f:
f.write(profs)
else:
print('No data about the nuber of professors found!')
with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f:
f.write('No data about the nuber of professors found!')
else:
print('No card found in this article!')
with open ('data about teachers.txt', 'a', encoding = 'utf-8') as f:
f.write('No card found in this article!')
if __name__ == '__main__':
main()
import os
import re
def tagsaway(sentence):
s = ''
for word in sentence:
word = re.sub(u'<.+?>', u'', word)
s = s + word + ' '
return s
def get_bigramms(text):
bi = []
text = text.split('')
for i, word in enumerate(text):
if 'gr="A=' and 'gen' in word:
if i+1 < len(text):
w = text[i+1]
if 'gr="S,' and 'gen' in w:
result1 = re.search('(.+?)', word)
result2 = re.search('(.+?)', w)
bi.append([result1.group(1), result2.group(1), tagsaway(text)])
return bi
def newfile(arr):
f = open('bigramms.txt', 'w', encoding = 'utf8')
s = ''
for i in arr:
s = i[0] + '\t' + i[1] + '\t' + i[2] + '\n'
f.write(s)
s = ''
f.close
def filework():
folder = 'news'
for file in os.listdir(folder):
with open(os.path.join(folder, file)) as text:
text = text.read().split('')
for se in text:
newfile(get_bigramms(se))
def main ():
filework()
main()
import os
import re
def get_author (text):
for word in text:
if 'name="author"' in word:
result = re.search('content="(.+?)"', word)
return result.group(1)
def get_day (text):
for word in text:
if 'name="created"' in word:
result = re.search('content="(.+?)"', word)
return result.group(1)
def file_inf():
ff = []
folder = 'news'
for file in os.listdir(folder):
with open(os.path.join(folder, file)) as text:
text = text.read().split('<')
ff.append([file, get_author(text), get_day(text)])
return ff
def newfile(arr):
f = open('files_info.csv', 'w', encoding = 'utf8')
f.write('Название файла;Автор;Дата создания текста\n')
s = ''
for i in arr:
s = i[0] + ';' + i[1] + ';' + i[2] + '\n'
f.write(s)
s = ''
f.close
def main ():
ff = file_inf()
newfile(ff)
main()
import os
import re
def files():
ff = {}
folder = 'news'
for file in os.listdir(folder):
with open(os.path.join(folder, file)) as text:
words = re.findall('', text.read())
ff[file] = len(words)
return ff
def newfile(dic):
f = open('words_in_files.txt', 'w', encoding = 'utf8')
s = ''
for k in dic:
s = k + '\t' + str(dic[k]) + '\n'
f.write(s)
s = ''
f.close
def main ():
ff = files()
newfile(ff)
main()
import os
def findanddel (folder):
for root, dirs, files in os.walk(folder, topdown = False):
for f in files:
os.remove(os.path.join(root, f))
for d in dirs:
os.remove(os.path.join(root, d))
def main ():
folder = input()
findanddel (folder)
main ()
import os
def draw ():
for root, dirs, files in os.walk ('.'):
for d in dirs:
print ('\t'*root.count('\\'), '--',d)
for f in files:
print ('\t'*root.count('\\'), f)
def main ():
draw()
main ()
def pointsaway (file):
file = file.split()
for i, word in enumerate (file):
file[i] = file[i].strip('.,?!()*&^%$
file[i] = file[i].lower()
return file
def words (file):
slova = {}
for word in file:
if word in slova:
slova[word] += 1
else:
slova[word] = 1
return slova
def creation (dic):
f = open ('file.tsv', 'w', encoding = 'utf8')
arr = []
for k in dic:
arr.append(k)
arr.sort()
for i in arr:
f.write(i + '\t' + str(dic[i]) + '\n')
f.close()
def main ():
f = open ('file.txt', 'r', encoding = 'utf8')
file = f.read()
f.close()
text = pointsaway (file)
semua = words (text)
creation (semua)
main ()
def pointsaway (file):
file = file.split()
for i, word in enumerate (file):
file[i] = file[i].strip('.,?!()*&^%$
file[i] = file[i].lower()
return file
def creation (text):
f = open ('new.txt', 'w', encoding = 'utf8')
dic = {text[x]: x for x in range(0, len(text))}
arr = [k for k in dic]
arr.sort()
for i in arr:
f.write('{}\t{}\n'.format(i, str(dic[i])))
f.close()
def main ():
f = open ('file.txt', 'r', encoding = 'utf8')
file = f.read()
f.close()
text = pointsaway (file)
creation (text)
main ()
import re
def get_word (word):
result = re.search('.*?(\w+)', word)
if result:
return result.group(1)
else:
return None
def find_ins (text):
inst = {}
for i, word in enumerate(text):
if 'gr="S' in word:
if 'ins' in word:
inst[i]=word
return inst
def newfile (words, text):
f = open ('ins.txt', 'w', encoding = 'utf8')
s = ''
for k in words:
i = 0
j = 1
while i<3:
if get_word(text[k-j]) != None:
s = get_word(text[k-j])+ ' ' + s
i += 1
j += 1
else:
j += 1
s = s + '\t' + get_word(words[k]) + '\t'
i = 0
j = 1
while i<3:
if get_word(text[k+j]) != None:
s = s + ' ' + get_word(text[k+j])
i += 1
j += 1
else:
j +=1
f.write(s)
f.close
def main ():
f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8')
file = f.read()
text = file.split('\n')
f.close()
ss = find_ins(text)
newfile (ss, text)
main ()
import re
def find_and_count (file):
pos = {}
for word in file:
word = word.split('<')
for part in word:
result = re.search('.*?gr="(\w+)', part)
if result:
print (result.group(1))
if result.group(1) not in pos:
pos[result.group(1)] = 1
else:
pos[result.group(1)] += 1
return pos
def newfile (dic):
f = open('pos.txt', 'w', encoding = 'utf8')
s = ''
for k in dic:
s = s + k + '\t' + str(dic[k]) + '\n'
f.write(s)
f.close
def main ():
f = open ('/home/woods/Загрузки/text.xml', 'r', encoding = 'utf8')
file = f.read()
text = file.split('\n')
f.close()
pos = find_and_count(text)
newfile (pos)
main ()
import re
def find_w (file):
words = re.findall('', file)
n = len(words)
return n
def find_ana (file):
anas = re.findall('(.+?)', line)
if a:
if a.group(2) not in types:
types[a.group(2)] = 0
return types
def countthem (file, types):
words = []
sum = 0
for key in types:
words.append(key)
for el in words:
for line in file:
if '"'+el+'"' in line:
sum += 1
types[el] = sum
sum = 0
return types
def newfile (types):
s = ''
f = open('adj.txt', 'w', encoding = 'utf8')
for key in types:
s = s + key + '-' + str(types[key]) + '\n'
f.write(s)
f.close()
def main():
text = filework()
dic = findthem(text)
dic = countthem (text, dic)
newfile(dic)
main()
import random
file=open ('file_6.6.txt', 'r')
def noun():
nouns=[]
for line in file:
if ' n ' in line:
line=line.split(' ')
nouns.append(line[0])
file.seek(0, 0)
return random.choice(nouns)
def pronoun():
pronouns=[]
for line in file:
if ' pn ' in line:
line=line.split(' ')
pronouns.append(line[0])
file.seek(0, 0)
return random.choice(pronouns)
def verb():
verbs=[]
for line in file:
if ' v ' in line:
line=line.split(' ')
verbs.append(line[0])
file.seek(0, 0)
return random.choice(verbs)
def adjective ():
adjectives=[]
for line in file:
if ' adj ' in line:
line=line.split(' ')
adjectives.append(line[0])
file.seek(0, 0)
return random.choice(adjectives)
def suborob (n, adj, pn):
x=random.randint(0,1)
if x==0:
return pn
else:
y=random.randint(0,1)
if y==0:
return n+' '+adj
else:
return n+' '+pn
def declarative (subj, v, obj):
return subj.capitalize()+' '+v+' '+obj+'.'
def question (subj, v):
x=random.randint (0, 1)
if x==0:
return 'Apa'+' '+subj+' '+v+'?'
else:
return 'Siapa'+' '+v+'?'
def negative (subj, v, obj):
x=random.randint(0, 1)
if x==0:
return subj.capitalize()+' tidak '+v+' '+obj+'.'
else:
return subj.capitalize()+' bukan '+obj+'.'
def imperative (v, obj):
x=random.randint(0,1)
if x==0:
return v.capitalize()+' '+obj+'!'
else:
return 'Jangan '+v+' '+obj+'!'
def conditional (subj1, v1, obj1, subj2, v2, obj2):
return 'Kalau '+subj1+' '+v1+' '+obj1+', '+subj2+' '+v2+' '+obj2+'.'
def sequence ():
a=[1, 2, 3, 4, 5]
b=[]
for i in range (5):
x=random.choice(a)
while x in b:
x=random.choice(a)
b.append(x)
return b
def text():
seq=sequence()
for i in range (5):
if seq[i]==1:
print(declarative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun())))
elif seq[i]==2:
print (question(suborob(noun(), adjective(), pronoun()), verb()))
elif seq[i]==3:
print (negative(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun())))
elif seq[i]==4:
print (imperative(verb(), suborob(noun(), adjective(), pronoun())))
else:
print (conditional(suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun()), suborob(noun(), adjective(), pronoun()), verb(), suborob(noun(), adjective(), pronoun())))
text()
file.close()
import re
def search (text):
otr = re.search ('\nОтряд: | \n(.+?) | \n ', text)
if otr:
result = otr.group(3)
return result
def main ():
f = open('file_10.6.html', 'r', encoding = 'utf8')
file = f.read()
f.close()
ans = search (file)
print (ans)
main()
def pointsaway (file):
file = file.replace('?!', '.')
file = file.split('.')
for i, word in enumerate (file):
file[i] = file[i].replace('.,?!()*&^%$
file[i] = file[i].replace('-- ', ' ')
file[i] = file[i].lower()
return file
def tenplus (text):
for sentence in text:
sentence = sentence.split()
n=0
s=0
for word in sentence:
word = word.strip('.,?!()*&^%$
s+=len(word)
n+=1
if n>10:
print ("Это предложение со словами длины %s"%(str(round(s/n, 1))))
def main ():
f = open ("file_12.6.txt", "r", encoding = "utf8")
file = f.read()
f.close()
text = pointsaway (file)
tenplus (text)
main ()
import random
def intothedic (file):
dic={}
for line in file:
line = line.split(';')
for j, word in enumerate(line):
line[j] = word.strip('\n')
dic[line[0]] = line[1]
return dic
def youchoose (dic):
keys = []
for key in dic:
keys.append(key)
return random.choice(keys)
def thegameison (noun, dic):
for key in dic:
if key == noun:
hint = dic[key]
n=key
break
print (hint, '...')
for i in range (3):
if input() == n:
print ('Победа!')
break
else:
if i == 0:
print ('Ещё 2 попытки')
continue
elif i == 1:
print ('Ещё 1 попытка')
continue
else:
print ('GAME OVER')
f = open('file_8.6.csv', 'r', encoding = 'utf8')
file = f.readlines()
f.close()
words = intothedic(file)
word = youchoose(words)
thegameison(word, words)
import re
def pointsaway (file):
file = file.split()
for i, word in enumerate (file):
file[i] = file[i].strip('.,?!()*&^%$
file[i] = file[i].lower()
return file
def findverbs (file):
verbs = []
for word in file:
if re.search ('загру(з(и.*|ят.*)|ж(у.*|ен.*))', word) != None:
if word not in verbs:
verbs.append(word)
return verbs
f = open ('file_9.6.txt', 'r', encoding = 'utf8')
file = f.read()
f.close()
file = pointsaway(file)
verbs = findverbs(file)
print (verbs)
def filework ():
f = open('corpus.txt', 'r', encoding = 'utf8')
file = f.readlines()
f.close()
return file
def newfile (text):
f = open('lines.txt', 'w', encoding = 'utf8')
f.write(str(len(text)))
f.close
def main():
text = filework()
newfile(text)
main()
import os
def names(array):
names = []
for name in array:
if os.path.isfile(name):
names.append(name)
return names
def haspoints(array):
points = ',!_-'
s = 0
su = 0
for name in array:
for c in name:
if c in points:
s += 1
if s > 0:
su += 1
s = 0
print ("Знаки препинания есть в названии такого количества файлов: ", su)
def main():
files = names (os.listdir('.'))
print (os.listdir('.'))
haspoints (files)
main()
a=input ('Введите число ')
for i in range (1, 11):
print (i, '*', a, '=', i*int(a))
xs=[]
for i in range (7):
xs.append(int(input()))
for i in range (7):
if xs[i]>0:
for j in range (xs[i]):
print ('x', end='')
print ('\n')
else:
print ('\n')
word=input()
l=len(word)
while l>0:
word=list(word)
x=word.pop(0)
print (''.join(word))
l=len(word)
def pointsaway (file):
file = file.split()
for i, word in enumerate (file):
file[i] = file[i].strip('.,?!()*&^%$
file[i] = file[i].lower()
return file
def findomni (file):
omni = []
for word in file:
if word.startswith('omni'):
w = word.replace('omni', '')
omni.append([word, w])
return omni
def findwords (array, file):
n = 0
m = 0
for i, k in enumerate (array):
for word in file:
if word == array[i][0]:
n += 1
if word == array[i][1]:
m += 1
print (array[i][0], n, '-', array[i][1], m)
n = 0
m = 0
def main ():
name = input('Введите имя файла ')
f = open (name, "r")
file = f.read()
f.close()
file = pointsaway (file)
findwords(findomni(file), file)
main()
import re
def filework():
f = open('corpus.txt', 'r', encoding = 'utf8')
file = f.readlines()
f.close()
return file
def findthem (file):
types = {}
for line in file:
a = re.search('(.+?)', line)
if a:
if a.group(2) not in types:
types[a.group(2)] = 0
return types
def countthem (file, types):
words = []
sum = 0
for key in types:
words.append(key)
for el in words:
for line in file:
if '"'+el+'"' in line:
sum += 1
types[el] = sum
sum = 0
return types
def newfile (types):
s = ''
f = open('types.txt', 'w', encoding = 'utf8')
for key in types:
s = s + key + '\n'
f.write(s)
f.close()
def main():
text = filework()
dic = findthem(text)
dic = countthem (text, dic)
newfile(dic)
main()
capital='АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
s=0
cap=0
f=open('file_5.6.txt', 'r', encoding='utf8')
file=f.readlines()
l=len(file)
for i in range (0, l):
file[i]=file[i].split(' ')
for i in range (0, l):
m=len(file[i])
for j in range (0, m):
if file[i][j][0] in capital:
cap+=1
s+=1
print ((cap/s)*100)
f.close()
import xml.etree.ElementTree as a
from os import walk
def sent(filename):
tree = a.parse('./news/'+file)
root = tree.getroot()
tmp = root.findall('.//se')
return(len(tmp))
def move(res,filename):
res_file = open(filename, 'w')
for item in res:
res_file.write(item+'\n')
f = []
words = []
p = './news';
for (dirpath, dirnames, filenames) in walk(p):
f.extend(filenames)
break
for file in f:
words.append(file+'\t'+str(sent(file)))
move(words,'counted.txt')
n=[]
while True:
word = input ('word:')
if word ==(''):break
elif word.endswith ('tur'):
n.append(word)
print ('/n'. join(n))
d={'Germany':'Berlin','France':'Paris',
'USA':'Washington DC',
'Russia':'Moscow'}
for key in d:
print (key+"*"+d[key])
def capital(a):
a=input("Country:")
if a in d:
return (d[a])
else: print ("oops")
def revert ():
k={}
countries=d.keys()
for key in countries:
k[d[key]]=key
return (k)
a=revert()
print(a)
with open ('fr.txt', 'r', encoding = 'utf-8') as a:
text=a.readlines()
for line in text:
if 'союз' in line:
print (line)
n = input("WORD: ")
m = len(n)//2
print (n[:m], n[:m-1:(-1)])
import os
import shutil
name=input ('напишите што-нибудь')
f_name=name.replace(' ', '\\')
os.makedirs(f_name)
n = input ('word:')
for i in range (len(n)):
print(n [-i-1::])
import re
def open_html('xenokeryx.html'):
with open ('xenokeryx.html', 'r', encoding='utf-8') as f:
content=f.read()
return content
def find_links (content):
reg=r'(.*?)'
links=re.findall (reg, content)
return links
text=open_html ('xenokeryx.html')
links=find_links(text)
for link in links [:20]:
print (link[1], '-->', link[0])
a=open (input(), 'r', encoding='utf-8')
b=0
c=0
for line in a:
arr=line.split()
b=b+len(arr)
for d in arr:
if len(d)>10:
c=c+1
a.close()
e=c/b*100
print (e, '%')
import os
def sup():
for root,dirs,files in os.walk('.'):
num=root.count('\\')
root+ root.split('\\')[-1]
print ('\t'*(num), root, sep='--')
for f in files:
print ('\t'*(num+1), f)
sup()
def opentext (file.txt):
forms = []
with open (file.txt, 'r', encoding='utf-8') as a:
text=a.read()
forms=text.split()
for i in range(len(forms)):
forms[i]=forms[i].strip(.,?!:;())
return forms
def word ():
a=opentext(file.txt)
b=[]
for i in range (len(a)):
if a[i][-1]=='s':
if a[i][-2]=='u':
if a[i][-3]=='o':
b.append(a[i])
print (b)
c=b.split()
d=str.count(c)
return d
import random
def noun ():
file=open('Mnoun.txt', 'r', encoding='utf-8')
f=readlines()
nouns=[]
for line in f:
nouns.append(line.split(" "))
return random.choise(nouns)
def verb ():
file=open ('verb1.txt', 'r', encoding='utf-8')
f=readlines()
verbs=[]
for line in f:
verbs.append(line.split(" "))
return random.choise(verbs)
def adj ():
file=open ('adj.txt', 'r', encoding='utf-8')
f=readlines()
adjectives=[]
for line in f:
adjectives.append(line.split(" "))
return random.choise(adjectives)
def noun2 ():
file=open ('noun2.txt','r', encoding='utf-8')
f=readlines()
plnouns=[]
for line in f:
plnouns.append(line.split(" "))
return random.choise(plnouns)
def conj():
conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"]
return "," + random.choise(conjs)
def noun3 ():
file=open ('noun3.txt', 'r', encoding='utf-8')
f=readlines ()
fnouns=[]
for line in f:
fnouns.append (line.split(" "))
return random.choise(fnouns)
def 2verb ():
file=open ('2verb.txt', 'r', encoding ='utf-8')
f=readlines ()
2verbs=[]
for line in f:
2verbs.append (line.split(" "))
return random.choise (2verbs)
def sen ():
return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"."
for i in range(5):
print (sen())
import xml.etree.ElementTree as a
from os import walk
def sent(filename):
tree = a.parse('./news/'+file)
root = tree.getroot()
tmp = root.findall('.//se')
return(len(tmp))
def move(res,filename):
res_file = open(filename, 'w')
for item in res:
res_file.write(item+'\n')
f = []
words = []
p = './news';
for (dirpath, dirnames, filenames) in walk(p):
f.extend(filenames)
break
for file in f:
words.append(file+'\t'+str(sent(file)))
move(words,'counted.txt')
import xml.etree.ElementTree as a
from os import walk
import pandas as q
def second(filename):
tree = a.parse('./news/'+file)
root = tree.getroot()
name = root.find(".//*[@name='author']")
topic = root.find(".//*[@name='topic']")
return(name.attrib['content']+":"+topic.attrib['content'])
f = []
d = []
p = './news';
for (dirpath, dirnames, filenames) in walk(p):
f.extend(filenames)
for file in f:
tmp = second(file).split(':')
tmp_arr = [file,tmp[0],tmp[1]]
d.append(tmp_arr)
df = q.DataFrame(d,columns=["название","автор","тема"])
df.to_csv("2.csv", sep=';', encoding='windows-1251')
import re
def main ():
with open('F.xml', 'r', encoding='utf-8') as f:
text=f.readlines()
return text
n=main ()
m=len(n)
print (m)
import re
dic = {}
with open('f.xml') as f:
for row in f:
if(re.match(r'.*',row)):
arr = row.split("\"")
key = arr[3]
if key in dic:
dic[key]=dic[key]+1
else:
dic[key] = 1;
for key in dic.items():
print(key+" "+"\r\n")
import os
import shutil
folder='.'
print (os.listdir('.'))
for f in os.listdir('.'):
with open (os.path.join(folder, f)) as text:
print('file: ', f)
a=str_word_count(f, ' ')
filelist = [f for f in os.listdir('.') if os.path.isfile(f)]
if a>1:
print(filelist)
def open_format(crab):
a = []
with open (crab.txt, 'r', encoding = 'utf-8') as f:
text = f.read()
text = re.sub('\.\.\.|[\.\?]', '!', text)
a = text.split('!')[:-1]
for i in range(len(a)):
a[i] = re.sub('[<>\*\.«»,\'\"]','', a[i])
a[i] = a[i].strip()
return a
def repeat():
work=open_format (crab.txt)
words=re.findall(r'([a-zA-Z]+(?:[?:[\'-][a-zA-Z]+)*)',s)
res=[]
for x in range (a,z):
res.append (x)
print (res)
import os
os.listdir('.')
file_tree=os.walk('.')
names = {}
for root, dirs, files in os.walk('.'):
for f in files:
name = f.split('.')[0]
if name not in names:
names[name]=1
print(len(names))
import random
def noun ():
file=open('Mnoun.txt', 'r', encoding='utf-8')
f=readlines()
nouns=[]
for line in f:
nouns.append(line.split(" "))
return random.choise(nouns)
def verb ():
file=open ('verb1.txt', 'r', encoding='utf-8')
f=readlines()
verbs=[]
for line in f:
verbs.append(line.split(" "))
return random.choise(verbs)
def adj ():
file=open ('adj.txt', 'r', encoding='utf-8')
f=readlines()
adjectives=[]
for line in f:
adjectives.append(line.split(" "))
return random.choise(adjectives)
def noun2 ():
file=open ('noun2.txt','r', encoding='utf-8')
f=readlines()
plnouns=[]
for line in f:
plnouns.append(line.split(" "))
return random.choise(plnouns)
def conj():
conjs=["и", "или", "но", "да", "однако", "зато", "когда", "пока", "потому что", "чтобы", "то есть"]
return "," + random.choise(conjs)
def noun3 ():
file=open ('noun3.txt', 'r', encoding='utf-8')
f=readlines ()
fnouns=[]
for line in f:
fnouns.append (line.split(" "))
return random.choise(fnouns)
def 2verb ():
file=open ('2verb.txt', 'r', encoding ='utf-8')
f=readlines ()
2verbs=[]
for line in f:
2verbs.append (line.split(" "))
return random.choise (2verbs)
def sen ():
return (noun+" "+verb+" "+adj+" "+noun2+" "+conj+" "+noun3+" "+2verb+"."
for i in range(5):
print (sen())
import re
def main():
with open ('lemon.html', 'r', encoding='utf-8') as f:
text=f.read()
a='+? '
if re.search (a, text):
card = re.search(a, text).group()
b='Семейство(?:.|\n)*?(.+?)'
if re.search(b, a):
с = re.search(b, a).group(1)
with open ('family.txt', 'a', encoding = 'utf-8') as f:
f.write(с)
else:
print('Family type not found.')
with open ('family.txt', 'a', encoding = 'utf-8') as f:
f.write('Family type not found.')
else:
print('Error!')
with open ('family.txt', 'a', encoding = 'utf-8') as f:
f.write('Error!')
import os
import re
def count_words():
with open('answer1.txt', 'w', encoding='utf-8') as fout:
for root, dirs, files in os.walk('./news'):
for f in files:
count = 0
with open(os.path.join(root, f), 'r') as fin:
f1 = fin.read().split()
for line in f1:
if '' in line:
count += 1
fout.write('%s \t %d \n' %(f, count))
def annot():
with open('answer2.csv', 'w', encoding='utf-8') as fout:
fout.write('Название файла \t Автор \t Дата создания')
for root, dirs, files in os.walk('./news'):
for f in files:
with open(os.path.join(root, f), 'r') as fin:
f2 = fin.read()
nam = f
reg1 = ''
reg2 = ''
auth = re.search(reg1, f2).group(1)
date = re.search(reg2, f2).group(1)
fout.write('%s \t %s \t %s \n' %(f, auth, date))
def bigramms():
with open('answer3.txt', 'w', encoding='utf-8') as fout:
for root, dirs, files in os.walk('./news'):
for f in files:
with open(os.path.join(root, f), 'r') as fin:
f3 = fin.read().split('\n')
reg = '(.+?)'
for indx, sentence in enumerate(f3):
if '' in sentence:
f3[indx] = [re.search(reg, sentence).group(1), re.search(reg, sentence).group(2)]
else:
f3.remove(sentence)
temp = True
for indx, word in enumerate(f3):
try:
if 'A' in word[0]:
if 'gen' in word[0]:
if 'S' in f3[indx + 1][0]:
if 'gen' in f3[indx + 1][0]:
fout.write('%s %s \n' %(word[1], f3[indx + 1][1]))
except IndexError:
temp = False
def main():
count_words()
annot()
bigramms()
if __name__ == '__main__':
main()
def done_text():
f = open('ostin.txt', 'r', encoding='utf-8')
s = f.read().lower().split()
f.close()
for indx, word in enumerate(s):
s[indx] = word.strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
return s
def count_words(arr):
d = {}
for word in arr:
if word in d:
d[word] += 1
else:
d[word] = 1
return d
def count_letters(arr):
dic = {}
alpha = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
for letter in alpha:
dic[letter] = 0
for word in arr:
if word and word[0] in dic:
dic[word[0]] += 1
return dic
def count_pos(arr):
dic = {key:ind for ind, key in enumerate(arr)}
return dic
def create_antw(dic):
f = open('answer_keys2.tsv', 'w', encoding='UTF-8')
for key in sorted(dic):
f.write('{0}\t{1}\n'.format(key, str(dic[key])))
f.close()
def main():
textik = done_text()
create_antw(count_pos(textik))
if __name__ == '__main__':
main()
my_num = 9
your_num = int(input('Write a number from 1 to 10, please: '))
if your_num == my_num:
print('You\'re lucky one :D')
else:
if your_num > my_num:
print('Your number is too big')
else:
print('Your number is too small')
your_num = int(input('Try again: '))
if your_num == my_num:
print('You\'re lucky one :D')
else:
print('You\'re hopeless')
import re
def split_txt():
f = open('test1.txt', 'r', encoding='UTF-8')
s = f.read()
s.replace('\n', ' ')
s1 = re.sub('(\?|!|\.\.\.|([а-яa-z.]+ [а-яa-zА-ЯA-Z]{2,}\.))', '\\1^', s)
print(s1)
def main():
split_txt()
main()
import re
def find_space(fname):
f = open(fname, 'r', encoding='UTF-8')
s = f.read().split()
regex = '«[a-zA-ZА-Яа-я]+?-[0-9]'
wlist = re.findall(regex, s)
print(', '.join(wlist))
def main():
find_space('test.txt')
main()
coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
while True:
s = input('Введите текст: ')
if s == '':
break
s = s.split()
res = ''
if s[0] == 'decode':
s.pop(0)
s = ' '.join(s)
for letter in s:
if letter == '!':
res += ' '
else:
for indx, i in enumerate(coinc):
if i == letter:
if i == 'A':
res += 'Z'
elif i == 'a':
res += 'z'
elif i == 'А':
res += 'Я'
elif i == 'а':
res += 'я'
else:
res += coinc[indx - 1]
else:
if s[0] == 'code':
s.pop(0)
s = ' '.join(s)
for letter in s:
if letter == ' ':
res += '!'
else:
for indx, i in enumerate(coinc):
if i == letter:
if i == 'Z':
res += 'A'
elif i == 'z':
res += 'a'
elif i == 'Я':
res += 'А'
elif i == 'я':
res += 'а'
else:
res += coinc[indx + 1]
print (res)
print('Программа завершила свою работу!')
words=[]
check = True
while check is True:
inp = input("Введите слово: ")
if inp == "":
check = False
else:
words.append(inp)
for indx in range(len(words) - 1, -1, -1):
print(words[indx])
check = True
while check is True:
s = input("Введите текст: ")
if s == "":
check = False
else:
res = ""
for letter in s:
if ord(letter) == 90:
res += chr(65)
elif ord(letter) == 122:
res += chr(97)
elif ord(letter) == 1071:
res += chr(1040)
elif ord(letter) == 1103:
res += chr(1072)
else:
res += chr(ord(letter) + 1)
print (res)
print("Программа завершила работу")
names = ['Оля','Маша','Коля','Костя','Нина','Ира']
surnames=['Кузнецова', 'Сидорова', 'Семенов', 'Иванов', 'Илларионова']
if len(names) >= len(surnames):
for i in range(len(surnames)):
strng = names[i] + ' ' + surnames[i]
print(strng)
check = len(surnames)
while check < len(names):
print(names[check])
check += 1
else:
for i in range(len(names)):
strng = names[i] + ' ' + surnames[i]
print(strng)
check = len(names)
while check < len(surnames):
print(surnames[check])
check += 1
def doneText(fname):
f = open(fname, 'r')
s = f.read().split(' ')
for indx, word in enumerate(s):
s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"')
f.close
return s
fnm = input('Введите имя файла: ')
arr = doneText(fnm)
print('Количество слов в тексте = {0}'.format(arr.len()))
my_num = 9
check = False
while (check == False):
try:
your_num = int(input('Write a number from 1 to 10, please: '))
except ValueError:
print("It's not a number, I'm out")
break
if your_num == my_num:
print('You\'re lucky one :D')
check = True
else:
if your_num > my_num:
print('Your number is too big')
else:
print('Your number is too small')
print("End of programme")
check = True
while check == True:
word = input('Write a word in cyrillic: ')
if word == "":
check = False
print("Empty word, I'm out")
else:
if word.endswith('о') or word.endswith('н') or word.endswith('р'):
print('Possible forms: Nom. Sg. / Acc. Sg.')
elif word.endswith('а'):
print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.')
elif word.endswith('у'):
print('Possible forms: Dat. Sg.')
elif word.endswith('ом'):
print('Possible forms: Instrum. Sg. / Nom. Sg.')
elif word.endswith('е'):
print('Possible forms: Prep. Sg.')
elif word.endswith('ам'):
print('Possible forms: Dat. Pl.')
elif word.endswith('ами'):
print('Possible forms: Instrum. Pl.')
elif word.endswith('ах'):
print('Possible forms: Prep. Pl.')
elif word.endswith('и'):
print('Possible forms: Nom. Pl.')
else:
print('Possible forms: Gen. Pl.')
print('Thanks for using!')
import os
import re
def rem_dir(name_dir):
for root, dirs, files in os.walk('.', topdown=False):
if re.findall(os.sep + name_dir, root):
for f in files:
os.remove(os.path.join(root, f))
for d in dirs:
os.rmdir(os.path.join(root, d))
for d in dirs:
if name_dir == d:
os.rmdir(os.path.join(root, d))
def print_root():
s = '--'
for root, dirs, files in os.walk('.'):
print (s + root)
if len(dirs):
s = '\t' + s
for f in files:
print('\t{0}'.format(f))
def main():
rem_dir('wrong')
print_root()
if __name__ == '__main__':
main()
word = input('Write a word in cyrillic: ')
if word.endswith('о'):
print('Possible forms: Nom. Sg. / Acc. Sg.')
elif word.endswith('а'):
print('Possible forms: Gen. Sg. / Nom. Pl. / Acc. Pl.')
elif word.endswith('у'):
print('Possible forms: Dat. Sg.')
elif word.endswith('ом'):
print('Possible forms: Instrum. Sg.')
elif word.endswith('е'):
print('Possible forms: Prep. Sg.')
elif word.endswith('ам'):
print('Possible forms: Dat. Pl.')
elif word.endswith('ами'):
print('Possible forms: Instrum. Pl.')
elif word.endswith('ах'):
print('Possible forms: Prep. Pl.')
else:
print('Possible forms: Gen. Pl.')
import re
def done_text(fname):
f = open(fname, 'r', encoding='UTF-8')
s = f.read().split()
for indx, word in enumerate(s):
s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><')
f.close
return s
def print_words(s):
regex = '.*[ауоиыеюя].*[ауоиыеюя].*[ауоиыеюя].*'
for word in s:
if re.search(regex, word):
print(word)
def main():
textik = done_text(input('Введите имя файла с расширением: '))
print_words(textik)
main()
import os
import re
import math
from math import log
PUNCT = '[.,!«»?&@"$\[\]\(\):;%
def preprocessing(text):
text_wo_punct = re.sub(PUNCT, '', text.lower())
words = text_wo_punct.strip().split()
return words
def count_tf(word, text):
return text.count(word) / len(text)
def count_df(word, texts):
n = [1 for text in texts if word in text]
return sum(n)
def count_idf(word, texts):
n = len(texts) / (1 + count_df(word, texts))
return n
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
return log(tf, 10) * log(idf, 10)
def get_texts():
texts_dic = {}
for root, dirs, files in os.walk('wikipedia'):
for f in files[:50]:
with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
text = preprocessing(t.read())
texts_dic[f.split('.')[0]] = text
texts = list(texts_dic.values())
return texts_dic, texts
def fin_output(texts_dic, texts):
for text in texts_dic:
print("Top words in document {}".format(text))
scores = {}
for word in texts_dic[text]:
scores[word] = count_tfidf(word, texts_dic[text], texts)
sorted_words = sorted(scores.items(), key=lambda x: x[1])
for word, score in sorted_words[:5]:
print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
def main():
a = get_texts()
fin_output(a[0], a[1])
if __name__ == '__main__':
main()
check = True
coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
while check is True:
s = input('Введите текст: ')
if s == '':
check = False
else:
res = ''
for letter in s:
if letter == ' ':
res += ' '
else:
for indx, i in enumerate(coinc):
if i == letter:
if i == 'A':
res += 'Z'
elif i == 'a':
res += 'z'
elif i == 'А':
res += 'Я'
elif i == 'а':
res += 'я'
else:
res += coinc[indx - 1]
print (res)
print('Программа завершила свою работу!')
def done_text(fname):
f = open(fname, 'r')
s = f.read().split()
for indx, word in enumerate(s):
s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»”“><')
f.close
return s
def freq_dic(arr):
dic = {}
for word in arr:
if word not in dic:
dic[word] = 1
else:
dic[word] += 1
return dic
def print_dic(dic):
for word in dic:
if dic[word] >= 10:
print(word, dic[word])
def main():
my_text = done_text(input('Введите имя файла с расшриением: '))
print_dic(freq_dic(my_text))
main()
def done_text(fname):
f = open(fname, 'r')
s = f.read().split()
for indx, word in enumerate(s):
s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
f.close
return s
def count_syll(arr, n):
res = []
voc = 'аоуыиеёюя'
for word in arr:
num = 0
for letter in word:
if letter in voc:
num += 1
if num == n:
res.append(word)
return res
def first_letter(arr, letter):
res = []
for word in arr:
if word.startswith(letter):
res.append(word)
return res
def choice():
fnm = input('Введите имя файла: ')
textik = done_text(fnm)
make_choice = input('Если хотите, чтобы программа считала слоги, введите syllables; иначе - letter: ')
if make_choice == 'syllables':
numb = int(input('Введите количество слогов в словах: '))
print(' '.join(count_syll(textik, numb)))
else:
lett = input('Введите желаемую первую букву: ')
print(' '.join(first_letter(textik, lett)))
def main():
choice()
main()
import os
def mk_ppk(s):
s = s.split()
pth = '.'
for word in s:
pth += os.sep + word
if not os.path.exists(pth):
os.makedirs(pth)
def mk_fls(num):
pth = '.'
for ppk in range(num):
pth += os.sep + str(ppk+1)
if not os.path.exists(pth):
os.makedirs(pth)
for pp_quant in range(ppk+1):
f = open(pth + os.sep + str(pp_quant + 1) + '.txt', 'w')
f.close()
pth = '.'
def prnt_dir(nm_dir):
for fl in os.listdir(nm_dir):
if os.path.isdir(fl):
print(fl)
def main():
mk_ppk(input('Введите приложение: '))
mk_fls(int(input('Введите число: ')))
prnt_dir('.')
if __name__ == '__main__':
main()
my_num = 9
check = False
your_num = int(input('Write a number from 1 to 10, please: '))
while(your_num != my_num):
if your_num > my_num:
print('Your number is too big')
else:
print('Your number is too small')
try:
your_num = int(input('Try again: '))
except ValueError:
print("Not a number")
check = True
break
if check == True:
print("See you next time")
else:
print("You're right")
check = True
coinc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
while check is True:
s = input('Введите текст: ')
if s == '':
check = False
else:
res = ''
for letter in s:
if letter == ' ':
res += ' '
else:
for indx, i in enumerate(coinc):
if i == letter:
if i == 'Z':
res += 'A'
elif i == 'z':
res += 'a'
elif i == 'Я':
res += 'А'
elif i == 'я':
res += 'а'
else:
res += coinc[indx + 1]
print (res)
print('Программа завершила свою работу!')
f = open('freq_crlf.txt', 'r', encoding='utf-8')
s = f.read().split('\n')
f.close()
for line in s:
line = line.split(' | ')
if line[1] == 'союз':
print(' | '.join(line))
f = open('freq_crlf.txt', 'r', encoding='utf-8')
s = f.read().split('\n')
f.close()
arr = []
while True:
word = input('Введите слово: ')
if word == '':
print('Результаты:')
break
else:
arr.append(word)
for word in arr:
check = False
for line in s:
line = line.split(' | ')
if word == line[0]:
print(' | '.join(line))
check = True
if check is False:
print(u'{0}: Такого слова в словаре нет.'.format(word))
print('Завершение работы программы')f = open('freq_crlf.txt', 'r', encoding='utf-8')
s = f.read().split('\n')
f.close()
while True:
word = input('Введите слово: ')
if word == '':
print('Завершение работы программы')
break
else:
check = False
for line in s:
line = line.split(' | ')
if word == line[0]:
print(' | '.join(line))
check = True
if check is False:
print('Такого слова в словаре нет.')
import decimal
f = open('freq_crlf.txt', 'r', encoding='utf-8')
s = f.read().split('\n')
f.close()
ress = ''
ipm_sum = 0
for line in s:
line = line.split(' | ')
if line[1].find('ед жен') != -1:
ress += line[0]
ress += ', '
ipm_sum += decimal.Decimal(line[2])
print(ress)
print(u'Суммарное значение ipm = {0}'.format(ipm_sum))import re
def open_file():
f = open('islandic.xml', 'r', encoding='UTF-8')
s = f.read()
f.close()
return s
def count_lines():
s = open_file()
s = s.split('\n')
f = open('answer_length.txt', 'w', encoding='UTF-8')
f.write(str(len(s)))
f.close()
def my_diction(arr):
dix = {}
for word in arr:
if word in dix:
dix[word] += 1
else:
dix[word] = 1
return dix
def create_diction():
s = open_file()
regex = ''
arr = re.findall(regex, s)
dix = my_diction(arr)
f = open('answer_keys.txt', 'w', encoding='UTF-8')
f.write('Отсортированный список морфологических разборов:\n')
for key in sorted(dix):
f.write(key + '\n')
f.close()
def count_adj():
s = open_file()
regex = ''
arr = re.findall(regex, s)
dix = my_diction(arr)
f = open('answer_adj.txt', 'w', encoding='UTF-8')
for key in sorted(dix):
f.write(key + ' ' + str(dix[key]) + '\n')
f.close()
def create_csv():
s = open_file()
print(s)
regex1 = '(.+?)'
regex2 = '<.+?>\n'
s = re.sub(regex1, '\\1, \\2, \\3', s)
s = re.sub(regex2, '', s)
s = re.sub('( )+?', '', s)
s = s.split('\n')
f = open('answer_dict.csv', 'w', encoding='UTF-8')
for line in s:
f.write(line + '\n')
f.close()
def main():
count_lines()
create_diction()
count_adj()
create_csv()
if __name__ == '__main__':
main()
check = True
words = []
while check is True:
s = input("Введите слово: ")
if s == "":
check = False
else:
temp = []
for letter in s:
temp.append(letter)
words.append(temp)
for wrd in words:
for letterindx in range(2, len(wrd), 2):
if letterindx >= len(wrd):
break
wrd.pop(letterindx)
s = ""
for letterindx in range(len(wrd) - 1, -1, -1):
s += wrd[letterindx]
print(s)f = open('input.txt', 'r', encoding='UTF-8')
s = f.read().split('\n')
f.close()
avgsum = 0
for indx, line in enumerate(s):
s[indx] = line.split()
avgsum += len(s[indx])
print(u'Среднее количество слов в строке = {0}'.format(avgsum / len(s)))import re
word = input("Введите слово на кириллице: ")
pattern1 = r'[А-Яа-я]'
pattern2 = r'[1-9A-Za-z]'
if re.match(pattern1, word) and re.search(pattern2, word) is None:
for indx, letter in enumerate(word):
if indx % 2 != 0:
if letter != "а" and letter != "к":
print(letter)
else:
print("Вводить можно только кириллицу :Р")temp = False
while (temp == False):
try:
a = float(input('Введите первое число (a) '))
b = float(input('Введите второе число (b) '))
c = float(input('Введите третье число (c) '))
temp = True
except (TypeError, ValueError):
print('Просила же только числа вводить!')
if a + b == c:
print('Поздравляю! a + b = c')
else:
print('Прошу прощения, но a + b != c')
if a*c + b == 0:
print('Поздравляю! a*c + b = 0')
else:
print('Прошу прощения, но a*c + b != 0')import random
def ask_name():
return input('Введите имя файла с расширением: ')
def get_words():
f = open(ask_name(), 'r', encoding='UTF-8')
s = f.read().split('\n')
f.close()
dic = {}
for ln in s:
temp = ln.split(',')
dic[temp[0]] = temp[1:]
return dic
def guess_word(word):
num = len(word)
if num <= 4:
print('У вас {0} попытки'.format(num))
else:
print('У вас {0} попыток'.format(num))
while num > 0:
temp = input('Введите слово: ')
if temp == word:
print('Вы угадали!')
break
else:
print('Попробуйте еще раз!')
num -= 1
if num == 0:
print('Повезет в другой раз!')
def game(d):
num_check = 0
for k in d:
print('Подсказка! {0} ...'.format(random.choice(d[k])))
guess_word(k)
num_check += 1
if num_check == len(d):
print('Это было последнее слово. Приходите еще')
break
ask = input('Хотите попробовать еще раз? Введите только "да" или "нет": ')
if ask == 'нет':
break
def main():
d = get_words()
game(d)
main()def done_text(fname):
f = open(fname, 'r')
s = f.read().split()
for indx, word in enumerate(s):
s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
f.close
return s
def get_fname():
return input("Введите имя файла с расширением: ")
def count_ing(arr):
res = 0
for word in arr:
if word.endswith('ing'):
res += 1
return res
def count_form(arr, form):
res = 0
for word in arr:
if word == form:
res += 1
return res
def main():
textik = done_text(get_fname())
print('Всего в тексте {0} форм на -ing'.format(count_ing(textik)))
form = input('Введите форму, количество вхождений которой хотите найти: ')
print('Эта форма встречается {0} раз'.format(count_form(textik, form)))
main()import re
def search_inf(fname):
f = open(fname, 'r', encoding='UTF-8')
s = f.read()
f.close()
regex = '>Столица.*?([А-Яа-я]+(-[А-Яа-я]+)*)'
res = re.search(regex, s, re.DOTALL)
if res:
k = open('answer.txt', 'w', encoding='UTF-8')
print(res.group(1))
k.write(res.group(1))
k.close()
def main():
search_inf(input('Введите имя файла: '))
if __name__ == '__main__':
main()import re
def change_text():
f = open('mosq1.txt', 'r', encoding='UTF-8')
s = f.read()
f.close()
s1 = re.sub('Комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'Слон\\1', s)
s1 = re.sub('комар(»| |а|ы|у|ом|е|ов|ам|ами|ах)', 'слон\\1', s1)
f = open('antwort.txt', 'w', encoding='UTF-8')
f.write(s1)
f.close()
def main():
change_text()
if __name__ == '__main__':
main()import random
def generate_adj():
f = open('adj.txt', 'r')
s = f.read().split()
f.close()
return random.choice(s)
def generate_noun(num):
if num == 'sg':
f_name = 'noun_sg.txt'
else:
f_name = 'noun_pl.txt'
f = open(f_name, 'r')
s = f.read().split()
f.close()
return random.choice(s)
def generate_verb():
f = open('verbs.txt', 'r')
s = f.read().split()
f.close()
return random.choice(s)
def generate_punct(pos):
if pos == 'end':
f_name = 'end_punct.txt'
else:
f_name = 'mid_punct.txt'
f = open(f_name, 'r')
s = f.read().split()
f.close()
punct = random.choice(s)
if punct == '-':
punct = ' ' + punct
return punct
def generate_pronoun():
f = open('pronouns.txt', 'r')
s = f.read().split()
f.close()
return random.choice(s)
def generate_intj():
f = open('intj.txt', 'r')
s = f.read().split('\n')
f.close()
return random.choice(s)
def generate_line(num):
if num == 1:
return generate_adj() + ' ' + generate_noun('sg') + generate_punct('end') + '\n'
elif num == 2:
return generate_verb() + ' ' + generate_noun('pl') + ' и' + '\n'
else:
return generate_pronoun() + generate_punct('mid') + ' ' + generate_intj() + generate_punct('end') + '\n'
def generate_haiku():
return generate_line(1) + generate_line(2) + generate_line(3)
print(generate_haiku())import re
def done_text(fname):
f = open(fname, 'r', encoding='UTF-8')
s = f.read().lower()
rez1 = '(,|:|№|-|\*|/|\||\[|\]|{|}|\\|(|)|\'|"|[0-9]|«|»|>|<|V|I|X)+'
s = re.sub(rez1, ' ', s)
rez = '\.|\?|!|\.\.\.'
s = re.split(rez, s)
f.close()
for indx, sent in enumerate(s):
s[indx] = sent.split()
if len(s[indx]) == 0:
s.pop(indx)
return s
def count_letters(arr):
mlist = [(indx + 1, word, len(word)) for indx, senten in enumerate(arr) for word in senten if len(word) >= 7]
f = open('answer_sheet12.txt', 'w', encoding='UTF-8')
for k in mlist:
f.write('предложение {0}, {1}-------{2}\n'.format(k[0], k[1], k[2]))
f.close()
def main():
count_letters(done_text('tolstoy.txt'))
if __name__ == '__main__':
main()import os
import re
def count_dirs():
res = '[0-9]'
arr = [thing for thing in os.listdir('.') if os.path.isdir(thing) and len(re.findall(res, thing))]
return arr
def print_answer(arr):
fout = open('answer_sheet13.txt', 'w', encoding='UTF-8')
fout.write('Всего папок с цифрами в названии - {0}.'.format(str(len(arr))))
fout.write('Все имена в директории (без повторений):\n')
clear_names = []
for thing in os.listdir('.'):
temp = thing
if os.path.isfile(thing):
temp = re.sub('\..+', '', thing)
if temp not in clear_names:
clear_names.append(temp)
for nme in clear_names:
if nme:
fout.write(nme + '\n')
fout.close()
def main():
print_answer(count_dirs())
if __name__ == '__main__':
main()import os
def count_dep():
count = 0
for root, dirs, files in os.walk('.', topdown=False):
if len(root.split(os.sep)) - 1 > count:
count = len(root.split(os.sep)) - 1
with open('answer_sheet14.txt', 'w', encoding='UTF-8') as answer:
answer.write(str(count))
def main():
count_dep()
if __name__ == '__main__':
main()while True:
s = input("Введите строку: ")
if s == "":
break
for indx, part in enumerate(s):
print(s[:len(s) - indx])s = input("Введите строку: ")
for indx, part in enumerate(s):
print(s[:len(s) - indx])import re
def ask_name():
return input('Введите имя файла с расширением: ')
def get_words():
f = open(ask_name(), 'r', encoding='UTF-8')
s = f.read().split()
for indx, word in enumerate(s):
s[indx] = word.lower().strip('.,:;№-*!?/|\[]{}()\'"1234567890«»><')
f.close
return s
def count_words(words):
regex = 'откр(ы|о)((т(ый|ая|ое|ые|ого|ой|ых|ому|ым|ую|ом|ою)|в(ш(ий?|ая|ее|ие|его|ей|их|ему|им|ую|ею))?)|(л(а|о|и)?)|(й(те)?)|(ют?|е(шь|м|те?)))(ся|сь)?'
wlist = []
for word in words:
if re.fullmatch(regex, word):
if word not in wlist:
wlist.append(word)
return wlist
def main():
print(', '.join(count_words(get_words())))
main()word = input('Введите слово: ')
if word == '':
print ('Слово не введено')
word2 = ''
for i in range(len(word)):
for k in range(len(word)):
if k + i < len(word):
word2 += word[k + i]
else:
word2 += word[k + i - len(word)]
print (word2)
word2 = ''
import re
def open_and_edit():
f = open("verbs.txt", 'r', encoding = "utf-8")
s = f.read()
f.close()
s1 = s.lower()
a = s1.split()
for i, word in enumerate(a):
a[i] = word.strip('.,!?();:*/\|<>-_%&
return a
def find_and_print(a):
arr = []
for word in a:
if re.search('^программир((у(ю(т|щ(и(й|ми?|е|х)|е(го|му?|й)|ая|ую))?|я|е(шь|те?))|ова(л(а|и)?|ть))(с(я|ь))?|уем(ы(й|ми?|е|х)?|о(го|му?|й)|ая?|ую))', word):
if word not in arr:
arr.append(word)
for verb in arr:
print(verb)
def main():
text = open_and_edit()
find_and_print(text)
main()
import re
import os
def task1():
for root, dirs, files in os.walk('.\\news'):
s = ''
for f in files:
file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251")
text = file.readlines()
words = 0
for line in text:
reg = ''
r = re.search(reg,line)
if r:
words += 1
s += f + '\t' + str(words) + '\n'
f2 = open("words_in_files.txt", 'w', encoding = "utf-8")
f2.write(s)
def task2():
for root, dirs, files in os.walk('.\\news'):
s = ''
for f in files:
file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251")
text = file.readlines()
author = ''
date = ''
for line in text:
reg_author = 'content="([ |(а-яА-яa-zA-Z)]+)" name="author"'
reg_date = 'content="([0-9]+\.[0-9]+\.[0-9]+)" name="created"'
r1 = re.search(reg_author, line)
if r1:
author = r1.group(1)
r2 = re.search(reg_date, line)
if r2:
date = r2.group(1)
s += f + '\t' + author + '\t' + date + '\n'
f3 = open("words_in_files.csv", 'w', encoding = "utf-8")
f3.write(s)
def task3():
for root, dirs, files in os.walk('.\\news'):
s = ''
for f in files:
file = open(os.path.join(root, f), 'r', encoding = "WINDOWS-1251")
text = file.readlines()
for i, line in enumerate(text):
reg_adj = 'A=.+gen.+>?'
reg_sumj = 'S,.+gen.+>?'
reg_word = '([а-яА-Я]+|`)\n'
r1 = re.search(reg_adj, line)
if r1:
r2 = re.search(reg_word, line)
word1 = r2.group(1)
r3 = re.search(reg_subj, text[i+1])
if r3:
word2 = r3.group(1)
def main():
task1()
task2()
main()
import re
def open_and_edit():
f = open("linguistics.txt", 'r', encoding = "utf-8")
s = f.read()
f.close()
return s
def replace_and_output(s):
s1 = re.sub('язык([а-я]{,3}( |\.|,|\)))','шашлык\\1', s)
s2 = re.sub('Язык([а-я]{,3}( |\.|,|\)))','Шашлык\\1', s1)
f = open("shashlyk.txt", 'w', encoding = "utf-8")
f.write(s2)
print('Текст записан в файл shashlyk.txt')
f.close()
def main():
text = open_and_edit()
replace_and_output(text)
main()
import os
import re
def count_folders():
result = 0
for f in os.listdir('.'):
if os.path.isdir(f):
if re.search('^([а-яА-Я]| )+$',f):
result += 1
print('Найдено папок:',result)
def print_names():
names = {}
file_name = '^(.+)(\.[a-z]+)$'
for f in os.listdir('.'):
if os.path.isdir(f):
if f not in names:
names[f] = 1
if os.path.isfile(f):
r = re.search(file_name,f)
if r:
name = r.group(1)
if name not in names:
names[name] = 1
for name in sorted(names):
print(name)
def main():
count_folders()
print_names()
main()
def read_file():
f = open("words.csv", 'r', encoding = "utf-8")
a = f.readlines()
f.close()
return(a)
def make_dict(a):
words = {}
for line in a:
a2 = line.split(';')
for i, h in enumerate(a2):
a2[i] = h.strip()
words[a2[1]] = a2[0]
return words
def guess(dic):
for noun in dic:
print(dic[noun], '...')
attempt = 0
while attempt != len(dic[noun]):
print('Осталось попыток: ', len(dic[noun]) - attempt )
attempt += 1
if input() == noun:
print('Маладэц!')
attempt = len(dic[noun])
elif len(dic[noun]) - attempt == 0:
print('Не угадал :(')
def main():
text = read_file()
words = make_dict(text)
print(words)
guess(words)
main()
import re
def open_and_edit():
f = open("hse.html", 'r', encoding = "utf-8")
s = f.read()
f.close()
return s
def find_and_print(s):
reg1 = 'Преподаватели.*?\n.*?\n[0-9]+ ?[0-9]+'
reg2 = 'Преподаватели.*?\n.*?\n '
res1 = re.findall(reg1,s)
res2 = re.findall(reg2,s)
number = res1[0].replace(res2[0], '')
print('Число преподавателей:',number)
f = open("found_number.txt", 'w', encoding = "utf-8")
f.write(number)
f.close()
def main():
text = open_and_edit()
find_and_print(text)
main()
words = []
while True:
newword = input('Введите слово: ')
if newword == '':
break
else:
words.append(newword)
for i in range(len(words)):
string = words[i]
if (i+1) >= len(string):
print('В этом слове не осталось символов')
else:
print(string[i+1:])
a = input ('Введите число a: ')
b = input ('Введите число b: ')
c = input ('Введите число c: ')
a = int (a)
b = int (b)
c = int (c)
if a % b == c:
print ('a дает остаток c при делении на b')
else:
print ('a не дает остаток c при делении на b')
if a * c + b == 0:
print ('c является решением линейного уравнения ax + b = 0')
else:
print ('c не является решением линейного уравнения ax + b = 0')
import random
def open_file():
f = open("words.txt", 'r', encoding = "utf-8")
text = f.readlines()
f.close()
return(text)
def find_words(word,text):
for i in range(len(text)):
line = []
line = text[i].split()
for l, w in enumerate(line):
line[l] = w.strip('.,!?();:*/\|<>-_%&
if line[0] == word:
words = []
for j in range(len(line)):
if j > 0:
words.append(line[j])
return(words)
def noun():
find = 'существительное'
nouns = find_words(find, text)
return random.choice(nouns)
def imperative():
find = 'императив'
imper = find_words(find, text)
return random.choice(imper)
def adverb(imp):
find = 'наречие'
adverbs = find_words(find, text)
return random.choice(adverbs) + ' ' + imp
def verb():
find = 'глагол'
verbs = find_words(find, text)
return random.choice(verbs)
def adjective():
find = 'прилагательное'
adj = find_words(find, text)
return random.choice(adj)
def question_word():
find = 'вопрос'
quest = find_words(find, text)
return random.choice(quest)
def pos_sentence():
sentence = adjective() + ' ' + noun() + ' ' + verb() +\
' ' + adjective() + ' ' + noun() + '.'
sentence = sentence.capitalize()
return(sentence)
def neg_sentence():
sentence = adjective() + ' ' + noun() + ' не ' + verb() +\
' ' + adjective() + ' ' + noun() + '.'
sentence = sentence.capitalize()
return(sentence)
def quest_sentence():
sentence = question_word()+ ' ' + adjective() + ' ' + noun() +\
' ' + verb() + ' ' + adjective() + ' ' + noun() + '?'
sentence = sentence.capitalize()
return(sentence)
def imper_sentence():
sentence = adverb(imperative()) + ' ' + noun() + '!'
sentence = sentence.capitalize()
return(sentence)
def if_sentence():
sentence = 'если бы ' + noun() + ' ' + verb() + ' ' + noun() +\
', то ' + noun() + ' ' + verb() + ' бы ' + noun() + '.'
sentence = sentence.capitalize()
return(sentence)
def random_print():
spisok = [pos_sentence(), neg_sentence(), quest_sentence(),\
imper_sentence(), if_sentence()]
random.shuffle(spisok)
for i in range(len(spisok)):
print(spisok[i], end = ' ')
text = open_file()
random_print()
word = input ('Введите слово: ')
indx = len(word)-1
while indx >= 0:
if (word[indx]!= 'я') & (word[indx]!= 'з') :
print (word[indx])
indx -= 1
import os
import re
def extensions():
ext_count = {}
for root, dirs, files in os.walk('.'):
for file in files:
ext = re.findall('\.[a-z0-9A-Z]+$', file)
if ext[0] not in ext_count:
ext_count[ext[0]] = 1
else:
ext_count[ext[0]] += 1
numb = 0
found_ext = ''
for ext in ext_count:
if ext_count[ext] > numb:
numb = ext_count[ext]
found_ext = ext
print(found_ext)
def main():
extensions()
main()
import re
def open_and_read():
f = open("animal_farm.txt", 'r', encoding = "utf-8")
s = f.read()
f.close()
return s
def read_sentences(s):
s1 = re.sub('[a-z](\.|!|\?)','\\1@@', s)
a = s1.split('@@')
return a
def split_and_count(a):
for i in range(len(a)):
words = a[i].split()
words2 = [words[j].strip('.,!?();:*/\|<>-_%&
for word in range(len(words2)):
print('%s_%s' %(words2[word], len(words2[word])))
def main():
text = open_and_read()
sent = read_sentences(text)
split_and_count(sent)
main()
f = open("1.txt", 'r', encoding = "utf-8")
word1 = 0
word3 = 0
for line in f:
words = line.split()
for i in range(len(words)):
if len(words[i]) == 3:
word3 += 1
elif len(words[i]) == 1:
word1 += 1
words = []
if word1 == 0:
print("Нет слов длинны 1")
else:
print(float(word3)/float(word1))
def open_and_edit():
name = input('Введите название файла: ')
f = open(name, 'r', encoding = "utf-8")
s = f.read()
f.close()
s1 = s.lower()
a = s1.split()
for i, word in enumerate(a):
a[i] = word.strip('.,!?();:*/\|<>-_%&
return a
def find_in_text(t):
hood = []
for word in t:
if word.endswith('hood'):
hood.append(word)
print('В тексте нашлось ', len(hood), ' существительных с суффиксом -hood')
return hood
def short_list(arr):
short = []
arr2 = []
for k in arr:
arr2.append(k)
for i in range(len(arr2)-1):
if arr2[i]:
short.append(arr2[i])
x = 1
for j in range(i+1, len(arr2)):
if arr2[i]:
if arr2[i] == arr2[j]:
x += 1
arr2[j] = []
short.append(x)
return short
def min_freq(arr):
short = short_list(arr)
min = short[1]
index = 1
for k in range(1, len(short), 2):
if short[k] < min:
index = k
min = short[k]
print('Минимальную частотность имеет существительное', short[index-1])
def print_nouns(arr):
nouns = []
short = short_list(arr)
for word in short:
if type(word) != int:
nouns.append(word.replace('hood', ''))
all_nouns = ', '.join(nouns)
print('Найденный слова образованы от существительных ', all_nouns)
def main():
text = open_and_edit()
found = find_in_text(text)
min_freq(found)
print_nouns(found)
main()
f = open("aphor.txt", 'r', encoding = "utf-8")
a = f.readlines()
f.close()
for i in range(len(a)):
words = []
words = a[i].split()
numb = 0
for j in range(len(words)):
if words[j] != '—':
numb += 1
if numb < 16:
print(a[i])
author = []
um = 0
for i in range(len(a)):
words = []
words = a[i].split()
for l, word in enumerate(words):
words[l] = word.strip('.,!?();:*/\|<>-_%&
for j in range(len(words)):
if words[j] == 'ум':
um += 1
povtor = 0
for k in range(len(author)):
if author[k] == words[len(words)-1]:
povtor += 1
if povtor == 0:
author.append(words[len(words)-1])
out = ''
out = ', '.join(author)
print('Количество цитат = ', um)
print('Источники: ', out)
inp_words = []
while True:
newword = input('Введите слово: ')
if newword == '':
break
else:
inp_words.append(newword)
for j in range(len(inp_words)):
found = 0
print(inp_words[j])
for i in range(len(a)):
words = []
words = a[i].split()
for l, word in enumerate(words):
words[l] = word.strip('.,!?();:*/\|<>-_%&
for k in range(len(words)):
if inp_words[j] == words[k]:
print(a[i])
found += 1
break
if found == 0:
print('Цитата с этим словом не найдена')
import re
def open_file():
f = open("file.txt", 'r', encoding = "utf-8")
a = []
for line in f:
a.append(line)
f.close()
return a
def open_new_file():
f = open("created_file.txt", 'w', encoding = "utf-8")
return f
def write_lines_number(a,f):
i = 0
for line in a:
i += 1
f.write(str(i))
f.write('\n')
def create_dictionary(a,f):
dic = {}
for line in a:
if '= 5:\n'))
while n < 5:
n = int(input('Введённое число < 5. Пожалуйста, введите число >= 5:\n'))
return n
def func3(dic, n):
for i in range(n):
key = random.choice(list(dic.keys()))
m = 3
print(i + 1, '-ое слово. ', 'Подсказка: ', random.choice(dic[key]), ' ...', sep = '')
fl = 0
while fl != 1 and m != 0:
print('Попыток осталось: ', m, sep = '')
if input('Введите ниже ваш ответ:\n').lower() == key:
fl = 1
print('Молодец! Всё верно!')
else:
print('Неверно. ', end = '')
if m != 1:
print('Ещё одна подсказка: ', random.choice(dic[key]), ' ...', sep = '')
m -= 1
if fl == 0:
print('Вы не угадали. Правильный ответ: ', key, sep = '')
def main():
dic = func1(input('Введите, пожалуйста, название файла:\n'))
n = func2()
func3(dic, n)
main()
def func1(name):
f = open(name, 'r', encoding = 'utf-8', errors = 'ignore')
words = f.read().replace('\n', ' ').split()
f.close()
for i, word in enumerate(words):
words[i] = word.lower().strip('.”“,/1234567890@
return words
def func3(words, word):
fl = 0
num = 0
while fl != 1:
try:
ind = words.index(word)
except ValueError:
fl = 1
continue
num += 1
words.pop(ind)
print(word, ': frequency = ', num, sep = '')
return words
def func2(words):
prefix = 'omni'
length = len(prefix)
for word in words:
if word.startswith(prefix) and length < len(word):
words = func3(words, word)
words = func3(words, word[length:])
print('-------------------------------------')
def main():
func2(func1('file.txt'))
main()
def func1(name):
f = open(name, 'r', encoding = 'utf-8', errors = 'ignore')
words = f.read().replace('\n', ' ').split()
f.close()
for i, word in enumerate(words):
words[i] = word.lower().strip('.”“,/1234567890@
return words
def func3(words, word):
num = 0
for elem in words:
if elem == word:
num += 1
print(word, ': frequency = ', num, sep = '')
def func2(words):
prefix = 'under'
length = len(prefix)
l = []
for word in words:
if word.startswith(prefix) and length < len(word) and word not in l:
func3(words, word)
func3(words, word[length:])
print('-------------------------------------')
l.append(word)
def main():
func2(func1('file.txt'))
main()
import os
def files_and_folders():
lst = os.listdir('.')
files = []
folders = []
for f in lst:
if os.path.isfile(f):
files.append(f)
else:
folders.append(f)
d_files = {}
for f in files:
f_name, f_ext = os.path.splitext(f)
if f_name not in d_files:
d_files[f_name] = 1
else:
d_files[f_name] += 1
return d_files, folders
def counting(d_files):
num = 0
punct_marks = '.!?:;,-()"\'<>'
for key in d_files:
fl = 0
i = 0
while fl != 1 and i < len(punct_marks):
if punct_marks[i] in key:
fl = 1
i += 1
if fl == 1:
num += d_files[key]
return num
def output(num, d_files, d_folders):
print('Количество файлов, названия которых содержит знаки препинания = ', num)
print('Названия файлов и папок в данной папке следующие:')
i = 1
for key in d_files:
print('%s) %s' % (str(i), str(key)))
i += 1
for key in d_folders:
if key not in d_files:
print('{}) {}'.format(str(i), str(key)))
i += 1
def main():
d_files, folders = files_and_folders()
num = counting(d_files)
output(num, d_files, folders)
if __name__ == '__main__':
main()
import os
def walking():
num = 0
for root, dirs, files in os.walk('.'):
d_files = {}
flag = False
for file in files:
file_name, file_ext = os.path.splitext(file)
if file_ext not in d_files:
d_files[file_ext] = 1
else:
flag = True
break
if flag:
num += 1
return num
def main():
num = walking()
print('Количество папок, в которых встречаются несколько файлов с одним\
и тем же расширением = {}.'.format(num))
if __name__ == '__main__':
main()
import os
def walking():
num = 0
for root, dirs, files in os.walk('.'):
d_files = {}
flag = False
for file in files:
file_name, file_ext = os.path.splitext(file)
if file_ext not in d_files:
d_files[file_ext] = 1
else:
flag = True
print(file)
break
if not flag:
num += 1
return num
def main():
num = walking()
print('Количество папок, в которых встречаются несколько файлов с одним\
и тем же расширением = {}.'.format(num))
if __name__ == '__main__':
main()
import re
def reading(name):
f = open(name, 'r', encoding = 'utf-8')
words = f.read().split(' ')
f.close()
return words
def cleaning(words):
for i, word in enumerate(words) :
words[i] = word.lower().strip('.,/1234567890@
return words
def printing(words):
l = []
for word in words:
if re.search('кот', word) and word not in l:
l.append(word)
print(word)
def main():
words = reading(input('Введите, пожалуйста, название файла:\n'))
words = cleaning(words)
printing(words)
if __name__ == '__main__':
main()
n = float(input('Введите любое число\n'))
print('число | ', n)
for i in range(9 + len(str(round(n * 10, 3)))) :
print('-', end = '')
print()
for i in range(1,11) :
if i != 10 :
print(i, ' | ', round(i * n, 3), end = '\n')
else :
print(i, ' | ', round(i * n, 3), end = '\n')
n = float(input('Введите любое число\n'))
for i in range(1,11) :
print(i, '*', n, '=', i * n, end = '\n')
import re
def reading(name):
f = open(name, 'r', encoding = 'utf-8')
lines = f.readlines()
f.close()
return lines
def array(lines):
text = ''.join(lines)
text = re.sub('((.|\n)*)', '\\1', text)
text = re.sub('<[wc](.*?)>(.*?)[wc]>', '\\1 \\2', text)
print(text)
l = re.findall('lemma="(.*?)" type="(.*?)" (.*)', text)
return l
def recording1(d, n):
f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8')
f.write(str(n) + '\n')
for key in d.keys():
f.write(key + '\n')
f.close()
def recording2(d):
f = open(input('Введите, пожалуйста, название выходного файла\n'), 'a', encoding = 'utf-8')
for key, value in d.items():
if re.search('l.f.*', key):
f.write(key + ' - ' + str(value) + '\n')
f.close()
def recording3(l):
name = input('Введите, пожалуйста, название выходного файла в формате csv\n')
while not name.endswith('.csv'):
name = input('Введите, пожалуйста, название выходного файла в формате csv\n')
f = open(name, 'a', encoding = 'utf-8')
for i, elem in enumerate(l):
f.write(elem[0] + ',' + elem[1] + ',' + elem[2] + '\n')
f.close()
def dictionary(lines):
d = {}
for line in lines:
r = re.search('lemma=".*" type="(.*)"', line)
if r:
key = r.group(1)
if key in d:
d[key] += 1
else:
d[key] = 1
return d
def main():
name = input('Введите, пожалуйста, название входного файла\n')
lines = reading(name)
n = len(lines)
d = dictionary(lines)
recording1(d, n)
recording2(d)
l = array(lines)
recording3(l)
if __name__== '__main__':
main()
import random
def noun() :
f = open('nouns.txt', 'r', encoding = 'utf-8')
nouns = f.read().split()
f.close()
return random.choice(nouns)
def personal_pronoun() :
f = open('personal_pronouns.txt', 'r', encoding = 'utf-8')
pronouns = f.read().split()
f.close()
return random.choice(pronouns)
def adjective_before_noun() :
f = open('adjectives_before_noun.txt', 'r', encoding = 'utf-8')
adj = f.read().split()
f.close()
return random.choice(adj)
def adjective_after_noun() :
f = open('adjectives_after_noun.txt', 'r', encoding = 'utf-8')
adj = f.read().split()
f.close()
return random.choice(adj)
def adverb() :
f = open('adverbs.txt', 'r', encoding = 'utf-8')
adverbs = f.read().split()
f.close()
return random.choice(adverbs)
def intensifier(adv):
f = open('intensifiers.txt', 'r', encoding = 'utf-8')
intensifiers = f.read().split()
f.close()
return random.choice(intensifiers) + ' ' + adv
def transitive_infinitive() :
f = open('transitive_infinitives.txt', 'r', encoding = 'utf-8')
inf = f.read().split()
f.close()
return random.choice(inf)
def intransitive_infinitive() :
f = open('intransitive_infinitives.txt', 'r', encoding = 'utf-8')
inf = f.read().split()
f.close()
return random.choice(inf)
def temporary_marker() :
f = open('temporary_markers.txt', 'r', encoding = 'utf-8')
temporary_markers = f.read().split()
f.close()
return random.choice(temporary_markers)
def interrogative() :
f = open('interrogatives.txt', 'r', encoding = 'utf-8')
interrogatives = f.read().split()
f.close()
return random.choice(interrogatives)
def number() :
f = open('numbers.txt', 'r', encoding = 'utf-8')
numbers = f.read().split()
f.close()
return random.choice(numbers)
def declension(noun, adjective, number) :
f = open('declension_of_nouns.txt', 'r', encoding = 'utf-8')
g = open('declension_of_adjectives.txt', 'r', encoding = 'utf-8')
nouns = dict()
adjectives = dict()
for line in f.readlines() :
s = line.split(' ', maxsplit = 1)
nouns[s[0]] = s[1].split()
for line in g.readlines() :
s = line.split(' ', maxsplit = 1)
adjectives[s[0]] = s[1].split()
f.close()
g.close()
if nouns[noun][0] == 'm' and number == 'sg' :
return noun, adjective, random.choice(['le', 'un'])
elif nouns[noun][0] == 'm' and number == 'pl' :
return nouns[noun][1], adjectives[adjective][1], random.choice(['les', 'des'])
elif nouns[noun][0] == 'f' and number == 'sg' :
return noun, adjectives[adjective][0], random.choice(['la', 'une'])
elif nouns[noun][0] == 'f' and number == 'pl' :
return nouns[noun][1], adjectives[adjective][2], random.choice(['les', 'des'])
def collocation_bef(noun, adj_before_noun, article) :
return article + ' ' + adj_before_noun + ' ' + noun
def collocation_aft(noun, adj_after_noun, article) :
return article + ' ' + noun + ' ' + adj_after_noun
def conjugation(pronoun, infinitive) :
f = open('conjugations.txt', 'r', encoding = 'utf-8')
verbs = dict()
for line in f.readlines() :
s = line.split(' ', maxsplit = 1)
verbs[s[0]] = s[1].split()
f.close()
if pronoun == 'je' :
return verbs[infinitive][0]
elif pronoun == 'tu' :
return verbs[infinitive][1]
elif pronoun == 'il' or pronoun == 'elle' :
return verbs[infinitive][2]
elif pronoun == 'nous' :
return verbs[infinitive][3]
elif pronoun == 'vous' :
return verbs[infinitive][4]
else :
return verbs[infinitive][5]
def affirmative_sequence(pronoun, verb) :
if verb[0] in 'aàâeéèêiîoôuùûy' and pronoun == 'je' :
return "j'" + verb
else :
return pronoun + ' ' + verb
def interrogative_sequence(pronoun, verb) :
if verb[len(verb) - 1] in 'aàâeéèêiîoôuùûy' and pronoun[0] in 'aàâeéèêiîoôuùûy' :
return verb + '-t-' + pronoun
else :
return verb + '-' + pronoun
def affirmative_sentence() :
pron = personal_pronoun()
noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'sg')
noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'pl')
return affirmative_sequence(pron, conjugation(pron, transitive_infinitive())) + ' ' + collocation_bef(noun1, adj1, art1) + ' et ' + number() + ' ' + adj2 + ' ' + noun2 + '.'
def interrogative_sentence() :
pron = personal_pronoun()
return interrogative() + ' ' + interrogative_sequence(pron, conjugation(pron, intransitive_infinitive())) + ' ' + temporary_marker() + '?'
def negative_sentence() :
noun1, adj1, art1 = declension(noun(), adjective_before_noun(), 'pl')
noun2, adj2, art2 = declension(noun(), adjective_before_noun(), 'sg')
return collocation_aft(noun1, adj1, art1) + ' ne ' + conjugation('elle', transitive_infinitive()) + ' pas ' + collocation_bef(noun2, adj2, art2) + ' ' + temporary_marker() + ' ' + intensifier(adverb()) + '.'
def conditional_sentence() :
return '[Здесь должно быть условное предложение, но я пока не представляю, как оно устроено во французском :( ].'
def imperative_sentence() :
return 'ne ' + conjugation('vous', intransitive_infinitive()) + ' pas' +'!'
def random_sentence(n) :
if n == 1 :
return affirmative_sentence()
elif n == 2 :
return interrogative_sentence()
elif n == 3 :
return negative_sentence()
elif n == 4 :
return conditional_sentence()
else :
return imperative_sentence()
def text_print() :
a = set('12345')
for n in a :
print(random_sentence(int(n)).capitalize(), end = ' ')
text_print()
import re
def reading(name):
f = open(name, 'r', encoding = 'utf-8')
words = f.read().replace('\n', ' ').split()
f.close()
return words
def cleaning(words):
for i, word in enumerate(words) :
words[i] = word.lower().strip('.,/1234567890@
return words
def printing(words):
for word in words:
if re.search('загру(з(ят(ся)?|и(шь(ся)?|(сь)?|м(ся)?|л((ся)?|а(сь)?|и(сь)?|о(сь)?)|т((ся)?|е(сь)?|ь(ся)?)|в(ш(ую(ся)?|ая(ся)?|е(го(ся)?|му?(ся)?|й(ся)?|е(ся)?|ю(ся)?)|и((сь)?|й(ся)?|м(и)?(ся)?|е(ся)?|х(ся)?)))?))|ж(у(сь)?|ен(а|о|ы)?|ён|(е|ё)нн(ая|ую|о(м(у)?|ю|е|го|й)|ы(м(и)?|й|е|х))))$', word):
print(word)
def main():
words = cleaning(reading(input('Введите, пожалуйста, название файла:\n')))
printing(words)
main()
import re
def reading(name):
f = open(name, 'r', encoding = 'utf-8')
text = f.read()
f.close()
return text
def find(text):
r = re.search('\= 0 :
bigw += 1
else :
j = 0
while j < len(l[i]) and alph.find(l[i][j]) == -1 :
j += 1
if j == len(l[i]) :
allw -= 1
elif alphUP.find(l[i][j]) >= 0 :
bigw += 1
if allw != 0 :
print('The percentage of words, which start with uppercase equals to ', round(bigw / allw * 100, 3), '%', sep = '')
else :
if fl == 0 :
print('There are no words at all! Try to use another file.')
else :
print('There are some symbols, but no words in Russian. Try to use another file!')
f.close()
fl = 0
while fl != 1 :
word = input('Please input one word:\n')
ind = word.find(' ')
if ind == -1 :
fl = 1
else :
if ind == 0 :
word = word[1:]
flag = word.find(' ')
while flag == 0 :
word = word[1:]
flag = word.find(' ')
if flag > 0 :
subword = word[flag:]
ind = subword.find(' ')
while ind == 0 :
subword = subword[1:]
ind = subword.find(' ')
if subword != '' :
print('There is more than one word. Please try again!')
else :
word = word[:flag]
fl = 1
else :
if word != '' :
fl = 1
else :
print("You didn't type any word! Please try again!")
else :
subword = word[ind:]
flag = subword.find(' ')
while flag == 0 :
subword = subword[1:]
flag = subword.find(' ')
if subword != '' :
print('There is more than one word. Please try again!')
else :
word = word[:ind]
fl = 1
for i in range(len(word)) :
print(word[i:])
fl = 0
while fl != 1 :
word = input('Please input one word:\n')
word = word.strip()
ind = word.find(' ')
if ind == -1 :
if word != '' :
fl = 1
else :
print("You didn't type any word! Please try again")
else :
print('There is more than one word. Please try again!')
for i in range(len(word)) :
print(word[i:])
a = float(input('enter the first number\n'))
b = float(input('enter the second number\n'))
c = float(input('enter the third number\n'))
if b == 0. :
print('you can\'t divide by zero')
elif a % b == c and a / b == c :
print('YES')
else :
print('NO')
a = float(input('enter the first number\n'))
b = float(input('enter the second number\n'))
c = float(input('enter the third number\n'))
if b == 0. :
print('you can\'t divide by zero')
else :
if a % b == c :
print('YES, a % b == c')
else :
print('NO, a % b != c')
if a / b == c :
print('YES, a / b == c')
else :
print('NO, a / b != c')
a = int(input('enter the first number\n'))
b = int(input('enter the second number\n'))
c = int(input('enter the third number\n'))
if b == 0 :
print('you can\'t divide by zero')
elif a % b == c and a / b == c :
print('YES')
else :
print('NO')
a = int(input('enter the first number\n'))
b = int(input('enter the second number\n'))
c = int(input('enter the third number\n'))
if b == 0 :
print('you can\'t divide by zero')
else :
if a % b == c :
print('YES, a % b == c')
else :
print('NO, a % b != c')
if a / b == c :
print('YES, a / b == c')
else :
print('NO, a / b != c')
import re
import os
import csv
def printing(d1, d2, arr):
f = open('output1.txt', 'w', encoding = 'cp1251')
for key, value in sorted(d1.items()):
f.write(key + '\t' + str(value) + '\n')
f.close()
with open('output2.csv', 'w', encoding = 'cp1251') as csv_file:
writer = csv.writer(csv_file, delimiter = ';')
writer.writerow(['Название файла', 'Автор', 'Дата создания текста'])
for key, value in sorted(d2.items()):
lst = [str(key), str(value[0]), str(value[1])]
writer.writerow(lst)
f = open('output3.txt', 'w', encoding = 'cp1251')
for elem in arr:
f.write(elem + '\n')
f.close()
def dictionary(name):
d1 = {}
d2 = {}
arr = []
for file in os.listdir(name):
with open(os.path.join(name, file), 'r', encoding = 'cp1251') as text:
text = text.read()
a = re.findall('(.*?)', text)
d1[file] = len(a)
b = re.findall('<.*?>(.*?)([\s,.!123456790:;?""])', text)
words = [words_punct[i][0] for i in range(len(words_punct))]
puncts = [words_punct[i][1] for i in range(len(words_punct))]
d = re.findall('gr="A.*?gen.*?>(.*?)\s.*?gr="S.*?gen.*?>(.*?)', text)
e = []
for i, elem in enumerate(d):
ind1 = words.index(elem[0])
ind2 = words.index(elem[1])
if ind2 - ind1 == 1:
t = ind1 - 1
while t >= 0 and puncts[t] not in '[.?!]':
t -= 1
k = ind2
while k <= len(words) - 1 and puncts[k] not in '[.?!]':
k += 1
s = ''
for p in range(t + 1, k):
if p != ind1 and p != ind2:
s += words[p] + puncts[p]
elif p == ind1:
s += '\t' + words[p] + puncts[p]
else:
s += words[p] + puncts[p] + 't'
e.append(s)
arr.extend(e)
return d1, d2, arr
def main():
d1, d2, arr = dictionary('news')
printing(d1, d2, arr)
if __name__ == '__main__':
main()
import re
def reading():
f = open('input.txt', 'r', encoding = 'utf-8')
text = f.read()
f.close()
text = text.replace('...', '.')
text = text.replace('—', '')
text = re.sub('[\.!\?]([а-яa-z])', ' \\1', text)
text = re.sub('[\.!\?]\)?»? ?«?\(?([а-яa-z])', ' \\1', text)
text = re.sub('\.([A-ZА-Я])', ' \\1', text)
text = re.sub('([A-ZА-Я])\. ([A-ZА-Я])', '\\1 \\2', text)
sentences = re.split(r'[.!?]', text)
sentences = [' '.join([word.strip('» «\n:<>\'"@
return sentences
def output(sentences):
f = open('output.txt', 'a', encoding = 'utf-8')
for sentence in sentences:
if len(sentence.split()) > 10:
s = 0
for word in sentence.split():
s += len(word)
f.write('"{}": это предложение со словами длины {:.1f}\n'.format(sentence, s/len(sentence.split())))
f.close()
def main():
sentences = reading()
output(sentences)
if __name__ == '__main__':
main()
print('Введите 7 целых чисел')
arr = []
for i in range(1, 8) :
print('Введите ', i, '-ое целое число', sep = '')
arr.append(int(input()))
for i in range(7) :
for j in range(arr[i]) :
print('X', end = '')
print()
import re
def reading(name):
f = open(name, 'r', encoding = 'utf-8')
text = f.read()
f.close()
return text
def correction(text):
corrected_text = re.sub('(Ф|ф)инлянди( |я(х|(ми?))?|и|й|ю|е(й|ю))', '\\1@алайзи\\2', text)
corrected_text = re.sub('ФИНЛЯНДИ( |Я(Х|(МИ?))?|И|Й|Ю|Е(Й|Ю))', 'МАЛАЙЗИ\\1', corrected_text)
corrected_text = corrected_text.replace('Ф@', 'М')
corrected_text = corrected_text.replace('ф@', 'м')
return corrected_text
def recording(text):
f = open(input('Введите, пожалуйста, название файла вывода:\n'), 'w', encoding = 'utf-8')
f.write(text)
f.close()
def main():
text = reading(input('Введите, пожалуйста, название файла ввода:\n'))
corrected_text = correction(text)
recording(corrected_text)
if __name__ == '__main__':
main()
import re
def open_text(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def search(text):
m = re.findall('\\bдинозавр(a(ми|х)?|у|о(м|в)|е|ы)', text)
return m
def tags(text):
m = re.sub('<.*?>', '', text, flags = re.DOTALL)
return m
def replace(text):
a = re.sub('\\bдинозавр', 'кот', text, flags = re.DOTALL)
return a
def images(text):
n = re.sub('(а|е|ё|и|оуэюя)')
fname = input()
txt = open_text(fname)
res = replace(txt)
print(res)
import re
import os
def auth(direct):
d = {}
for root, dirs, files in os.walk(direct):
for file in files:
with open(os.path.join(direct, file)) as f:
text = f.read()
regex1 = 'content=".*" name="author"'
a = re.findall(regex1, text)
for elem in a:
b = re.sub('content="', '', elem)
c = re.sub('" name="author"', '', b)
d[file] = c
return d
def topic(direct):
d = {}
for root, dirs, files in os.walk(direct):
for file in files:
with open(os.path.join(direct, file)) as f:
text = f.read()
regex1 = 'content=".*" name="topic"'
a = re.findall(regex1, text)
for elem in a:
b = re.sub('content="', '', elem)
c = re.sub('" name="topic"', '', b)
d[file] = c
return d
def main():
direct = './news'
d1 = auth(direct)
d2 = topic(direct)
with open('./15.csv', 'w', encoding='utf-8') as f:
for key in d1.keys():
f.write('\n{}'.format(key))
f.write(' {} '.format(d1[key]))
f.write('{}'.format(d2[key]))
main()
def open_text(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for index, elem in enumerate(arr):
arr[index] = elem.strip(',.;:!?\n ')
return arr
def first_letter(letter, way_to_file):
arr = open_text(way_to_file)
array = []
for elem in arr:
if elem[0] == letter:
array.append(elem)
return array
def questions():
letter = input()
fname = input()
result = first_letter(letter, fname)
return result
result = questions()
print(result)
import re
import os
def number_sent(direct):
d = {}
for root, dirs, files in os.walk(direct):
for file in files:
with open(os.path.join(direct, file)) as f:
text = f.read()
regex = ''
arr = re.findall(regex, text)
d[file]=len(arr)
return d
def main():
direct = './news'
d = number_sent(direct)
with open('./11.txt', 'w', encoding='utf-8') as f:
for key in d.keys():
f.write('\n{} {}'.format(key, d[key]))
main()
import random
def imperative():
imperative = ["прокати", "уходи", "не спеши", "погоди", "подожди", "позвони", "убегай", "не плошай", "подержи"]
return random.choice(imperative)
def verb():
plural_verbs = ["привезут", "принесли", "принесут", "пожуют", "погрызут", "приплетут", "приведут", "привели"]
return random.choice(plural_verbs)
def noun_phrase():
clitics = ["по", "ни", "на", "хоть", "лишь", "вот", "не", "от", "за", "пусть"]
clitic = random.choice(clitics)
words2 = ["себе", "тебе", "земля", "игра", "звезда", "мороз", "ответ", "превед", "футбол", "печаль", "бокал"]
noun = random.choice(words2)
return clitic + ' ' + noun
def noun(number):
singular_nouns = ["монолог", "коридор", "почему", "потому", "отчего", "каратэ", "кабарэ", "курага", "кандидат"]
plural_nouns = ["малыши", "рукава", "камыши", "табуны", "рюкзаки", "пиджаки", "пацаны", "чуваки"]
if number == 's':
return random.choice(singular_nouns)
return random.choice(plural_nouns)
def punctuation():
marks = [".", "?", "!", "..."]
return random.choice(marks)
def verse1():
return noun('pl') + ' ' + verb() + ' ' + noun('pl') + punctuation()
def verse2():
return imperative() + ' ' + noun('s') + ' ' + noun_phrase() + punctuation()
def verse3():
return noun_phrase() + ' ' + verb() + ' ' + noun('pl') + punctuation()
def make_verse():
verse = random.choice([1,2,3])
if verse == 1:
return verse1()
elif verse == 2:
return verse2()
else:
return verse3()
for n in range(4):
print(make_verse())
word=input()
for index, elem in enumerate(word):
if (index + 1) % 2 ==1:
if elem in 'пое':
print(elem)
else:
continue
else:
continue
message=input('Введите слово или сообщение: ')
result=''
for letter in message:
result += letter
print(result)
import re
def open_text(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for index, elem in enumerate(arr):
arr[index] = elem.strip(',.;:!?\n ')
return arr
def main():
reglex = 'на(й(ти|д(я|ут?|((е|ё)(шь|т|м|те)|ен(а|о|ы)?)))|ш((е|ё)л|л(а|о|и))|шедш(е(е|й|го|му?)|ая|ую|и(й|е|х|ми?))|йденн(о(е|го|ому?)|ая|ой|ую|ы(й|е|х|ми?))(с(ь|я))?)'
fname = input()
arr = open_text(fname)
array = []
for elem in arr:
m = re.search(reglex, elem)
if m != None:
if elem not in array:
array.append(elem)
return array
result = main()
print(result)
import random
def organising_array(way_to_file):
f = open(way_to_file, 'r', encoding = 'utf-8')
file = f.read()
arr = file.split('\n')
return arr
def noun_phrase():
adjectives = organising_array('./1.txt')
adjective = random.choice(adjectives)
nouns = organising_array('./2.txt')
noun = random.choice(nouns)
return adjective + ' ' + noun
def clause():
clauses = organising_array('./3.txt')
return random.choice(clauses)
def adverb():
adverbs = organising_array('./4.txt')
return random.choice(adverbs)
def clause2():
clitics = organising_array('./5.txt')
clitic = random.choice(clitics)
pronouns = organising_array('./6.txt')
pronoun = random.choice(pronouns)
verbs = organising_array('./7.txt')
verb = random.choice(verbs)
return clitic + ' ' + pronoun + ' ' + verb
def objects():
objects = organising_array('./8.txt')
return random.choice(objects)
def patient():
patients = organising_array('./9.txt')
return random.choice(patients)
def verb():
verbs = organising_array('./10.txt')
return random.choice(verbs)
def praep_phrase():
praeps = organising_array('./11.txt')
praep = random.choice(praeps)
nouns = organising_array('./12.txt')
noun = random.choice(nouns)
return praep + ' ' + noun
def adjective():
adjectives = organising_array('./13.txt')
return random.choice(adjectives)
def punctuation():
marks = organising_array('./14.txt')
return random.choice(marks)
def verse1():
return noun_phrase()+ punctuation() + ' ' + clause() + punctuation()
def verse2():
return adverb() + ' ' + clause2() + ' ' + objects() + punctuation()
def verse3():
return patient() + ' ' + verb() + ' ' + praep_phrase() + ' ' + adjective() + punctuation()
def make_verse():
verse = random.choice([1,2,3])
if verse == 1:
return verse1()
elif verse == 2:
return verse2()
else:
return verse3()
for n in range(4):
print(make_verse())
import re
def open_text(way):
with open(way, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.replace(',', '')
text = text.replace('.', '')
text = text.replace(':', '')
text = text.replace(';', '')
text = text.replace('!', '')
text = text.replace('?', '')
text = text.replace('-', '')
text = text.replace('"', '')
text = text.replace('(', '')
text = text.replace(')', '')
return text
def search(text):
regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b'
m = re.findall(regex, text, flags = re.DOTALL)
return m
def write(fname, m):
with open(fname, 'a', encoding = 'utf-8') as f:
for elem in m:
f.write(elem)
f.write('\n')
way = input()
fname = input()
text = open_text(way)
m = search(text)
write(fname, m)
def open_text(way):
with open(way, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for index, elem in enumerate(arr):
arr[index] = elem.strip(',.;:!?-')
return arr
def main():
fname = input()
arr = open_text(fname)
n = len(arr)
return n
res = main()
print('В файле содержится ', res, ' слов')
def open_text(way):
with open(way, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for index, elem in enumerate(arr):
arr[index] = elem.strip(',.;:!?-"')
return arr
def freq_list(arr):
d = {}
for elem in arr:
if elem not in d.keys():
d[elem] = 1
else:
d[elem] = d[elem] + 1
return d
def sort(d):
array = []
for elem in d.keys():
array.append(elem)
arr = []
for i in range(len(array)):
temp = array[i]
for index, elem in enumerate(array):
if elem < temp:
t = temp
temp = elem
array[index] = t
if temp not in arr:
arr.append(temp)
return arr
def write(fname, d, arr):
with open(fname, 'a', encoding = 'utf-8') as f:
for elem in arr:
f.write(elem)
f.write(',')
f.write(str(d[elem]))
f.write('\n')
fname = input()
fname2 = input()
arr = open_text(fname)
d = freq_list(arr)
a = sort(d)
write(fname2, d, a)
import re
def open_text(way):
with open(way, 'r', encoding = 'utf-8') as f:
text = f.read()
text = re.sub(',.:;!?-"()\n', '', text)
arr = text.split()
return arr
def search(arr):
result = []
regex = '\\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+аго\\b \\b[\\w]+\\b \\b[\\w]+\\b \\b[\\w]+\\b'
for i in range(len(arr) - 7):
current = ' '.join(arr[i:i+7])
m = re.search(regex, current)
if m is not None:
result.append(current)
return result
def write(fname, m):
with open(fname, 'a', encoding = 'utf-8') as f:
for elem in m:
f.write(elem)
f.write('\n')
way = input()
fname = input()
text = open_text(way)
m = search(text)
write(fname, m)
import random
def opening_csv(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
arr = text.split('\n')
d = {}
for elem in arr:
array = elem.split(',')
d[array[0]] = array[1]
return d
def random_key(d):
array = []
for elem in d.keys():
array.append(elem)
word = random.choice(array)
return word
def main():
way_to_file = input()
vocabul = opening_csv(way_to_file)
word = random_key(vocabul)
print(word)
for i in range(len(vocabul[word])):
print('_', end = ' ')
solve = input()
if solve == vocabul[word]:
result = 'WIN!!!'
else:
result = 'FAIL((('
return result
res = main()
print(res)
import re
def open_text(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def search(text):
text1 = re.sub('<.*?>', '', text, flags = re.DOTALL)
text2 = re.sub('\n', '', text1, flags = re.DOTALL)
m = re.findall('Часовой поясUTC.?[0-9]', text2)
return m
def write(arr, way_to_file2):
with open(way_to_file2, 'a', encoding = 'utf-8') as f:
for elem in arr:
newtext = f.write(elem)
return newtext
def main():
fname1 = input()
fname2 = input()
t = open_text(fname1)
txt = search(t)
res = write(txt, fname2)
return res
a = main()
import os
import re
def no_numbers():
num = '(1|2|3|4|5|6|7|8|9|0)'
file = '\.'
a = []
for elem in os.listdir('.'):
res = re.search(num, elem)
if res == None:
result = re.search(file, elem)
if result:
a.append(elem)
n = len(a)
return n
def no_repet():
arr = []
for elem in os.listdir('.'):
a = re.sub('\..*', '', elem)
if a not in arr:
arr.append(a)
return arr
print('Количество файлов без цифр в названии равно', no_numbers())
print('Найдены следующие файлы и папки (без повторов):', no_repet())
a=[]
word=input()
while word:
a.append(word)
word=input()
for el in a:
if len(el)>5:
print(el)
else:
continue
import re
def open_text(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def replace1(txt):
m = re.sub(r'\bвикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bбурундук\1', txt, flags = re.DOTALL)
return m
def replace2(txt):
n = re.sub(r'\bВикинг(а(ми?|х)?|у|о(м|в)|е|и)?\b', r'\bБурундук\1', txt, flags = re.DOTALL)
return n
def write(txt, way_to_file2):
with open(way_to_file2, 'w', encoding = 'utf-8') as f:
newtext = f.write(txt)
return newtext
def main():
fname1 = input()
fname2 = input()
txt = open_text(fname1)
r = replace1(txt)
res = replace2(r)
result = write(res, fname2)
return result
a = main()
import os
import re
def dir_choose_kyr(dir_name):
arr = []
regex ='[А-Я|Ё|а-я|ё| ]*'
for root, dirs, files in os.walk(dir_name):
for elem in dirs:
r = re.sub(regex, '', elem)
if r == '':
arr.append(elem)
print(arr)
n = len(arr)
return n
def main():
dir_name = '.'
n = dir_choose_kyr(dir_name)
print('В папке найдено ', n, ' папок с полностью кириллическими названиями (допускаются пробелы между словами)')
return n
main()
way = input('Введите путь к файлу без дополнительных символов: ', )
f = open(way, 'r', encoding = 'utf-8')
text = f.read()
f.close()
min = len(text)
max = 0
arr = text.split('\n')
for el in arr:
if len(el) > max:
max = len(el)
if len(el) < min:
min = len(el)
k = max/min
print('Самая короткая строка короче самой длинной в ', k, ' раз(а)')
def open_text(way_to_file):
with open(way_to_file, 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.lower()
arr = text.split()
for index, elem in enumerate(arr):
arr[index] = elem.strip(',.;:!?\n ')
return arr
def finding_suffix(suffix, way_to_file):
arr = open_text(way_to_file)
array = []
for elem in arr:
a = len(elem) - len(suffix)
b = len(elem)
if elem[a:b] == suffix:
array.append(elem)
return array
def one_word_once(array):
arr = []
for elem in array:
if elem not in arr:
arr.append(elem)
return arr
def func(array):
temp = []
arr = []
for elem in array:
if elem not in temp:
temp.append(elem)
else:
arr.append(elem)
return arr
def count_freq(array):
result = array
for i in range(len(array)):
temp = func(result)
if len(temp)==0:
break
else:
result = temp
return result
fname = input()
suffix = 'ness'
arr = finding_suffix(suffix, fname)
array = one_word_once(arr)
print('В тексте имеются следующие слова с суффиксом ', suffix, ':')
for elem in array:
print(elem)
max_freq = count_freq(arr)
print('Макс. частоту имеет(-ют) слово(-а):', max_freq)
import os,re
def counting_sentences(file):
sentences = re.findall('', file)
return len(sentences)
def opening_folders(folder):
path = folder
dic = {}
for file in os.listdir(folder):
with open ((os.path.join(folder, file))) as f:
text = f.read()
number = int(counting_sentences(text))
dic[file] = number
return dic
def writing_table(dic):
with open ('number_of-sentences.txt', 'w', encoding = 'utf-8') as f:
for file in dic:
f.writelines(file + '\t' + str(dic[file]) + '\n')
def author_and_topic(folder):
path = folder
for file in os.listdir(folder):
with open ((os.path.join(folder, file))) as f:
text = f.read()
reg1 = '(content="(.*)" name="author")'
reg2 = '(content="(.*)" name="topic")'
for i in range (1):
for i in re.findall(reg1, text):
author = i[1]
for i in re.findall(reg2, text):
topic = i[1]
writing_table(opening_folders(r'C:\Users/student/Desktop/news/'))
author_and_topic(r'C:\Users/student/Desktop/news/')
import os
number = 0
for roots, dirs, files in os.walk('.'):
names = []
for f in files:
name = f[::-1].split('.')[0]
if name not in names:
names.append(name)
else:
number += 1
break
print(number)
import re
m1 = 'загруж(у|(енн?(ы(й|е|х|ми?)?|о(го|му?|е|й)?|ую|а)?))(с(я|ь))?'
m2 = 'загруз(и(т|шь|м|т(ь|е)?|л(а|о|и)?)?(в(ш(и(й|ми?|х)|е(го|му?|е|й)))?)?|ят)(с(я|ь))?'
with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW9\re.txt",'r', encoding='utf-8') as f:
mas = []
for line in f:
words = line.split()
for word in words:
word = word.strip(',.;"()-!?')
mas.append(word.lower())
arr = []
for i in mas:
a = re.search(m1,i)
b = re.search(m2,i)
if a != None and len(a.group()) == len(i):
if a.group() not in arr:
arr.append(a.group())
print(a.group())
if b != None and len(b.group()) == len(i):
if b.group() not in arr:
arr.append(b.group())
print(b.group())
i=1
int(i)
print ('Введите любое число')
a=int(input())
while i<=10:
print(i,'*',a,'=',(i*a),';')
i += 1
else:
print ('Цикл завершен')
def opening(name):
with open (name, 'r', encoding = 'utf-8') as f:
count = 0
for line in f:
if line !=' \n':
count += 1
else:
break
return count
def writing():
count = opening(name)
with open('2.txt', 'w', encoding = 'utf-8') as f:
f.write(str(count))
name = r'C:\Users\student\Desktop\1.xml'
writing()
import re
with open (name, 'r', encoding = 'utf-8') as f:
content = f.read()
arr = re.findall(r'(.*?)', content)
d = {}
for i in arr:
d[i[1]] = content.count(i[1])
with open ('3.txt', 'w', encoding = 'utf-8') as f:
for key in d:
a = str(key) + ' ' + str(d[key]) + '\n'
f.write(a)
def reading():
with open (r"C:\Users\Анна\Documents\ФиКЛ\PythonHW7\omni.txt", 'r', encoding='utf8') as text:
mas = []
for line in text:
words = line.split()
for word in words:
word = word.strip(',.;"()-!?')
mas.append(word.lower())
return(mas)
def omni_counting():
s = 0
omni = []
for word in reading():
if word[:4] == 'omni':
s += 1
if word not in omni:
omni.append(word)
print (s,'words with OMNI-')
p = 0
for word in omni:
without_omni = []
w2 = word[4:]
if w2 not in without_omni:
without_omni.append(w2)
p += int(reading().count(w2))
print(p, 'words without OMNI-')
omni_counting()
import os, re
def folder_opening(big_folder):
names = [item for item in os.listdir(big_folder) if os.path.isfile(item) and re.search('[^.]*\..*?[,._?<>''""!-()].*?',str(item)[::-1])]
return len(names)
print('Найдено',folder_opening('.'), 'файлов, название которых содержит знаки препинания')
def all_files(big_folder):
files = [item[::-1] for item in os.listdir(big_folder) if os.path.isfile(item)]
all_files = []
for item in files:
all_files.append((re.sub(u'([^.]*\.)?(.*)', u'\\2', str(item))[::-1]))
for item in os.listdir(big_folder):
if os.path.isdir(item):
all_files.append(item)
all_files_new = []
for item in all_files:
if item not in all_files_new:
all_files_new.append(item)
return all_files_new
print('Все файлы:', all_files('.'))
total = 0
upletters = 0
with open(r'C:\Users\Анна\Documents\GitHub\prog\PythonHW5\text.txt','r',encoding='utf8') as f:
text = f.read()
words = text.split()
for item in words:
total += 1
for letter in item:
if letter.isupper():
upletters += 1
else:
continue
print("Количество слов в тексте: ",total)
print('Количество слов с заглавной буквы',upletters)
print("Процент слов в тексте, начинающихся с заглавной буквы: ", upletters/total,'%')
arr = []
num = 1
while num <=7:
chislo = int(input('Vvedite chisclo'))
if chislo > 0:
arr.append('X'*chislo)
else:
arr.append('')
num += 1
for i in arr:
print(i)
def opening(file):
with open(file, 'r', encoding = 'utf-8') as f:
f = f.read()
sentences = f.split('.')
mas = []
for sentence in sentences:
sentence = sentence.split('!')
for i in sentence:
i = i.split('?')
for a in i:
mas.append(a)
return mas
for sentence in opening(r"C:\Users\Анна\Documents\GitHub\prog\PythonHW12\text.txt"):
words = sentence.split()
new_words = [word.strip('.,!?/-;:''""«»—()') for word in words if len(words) > 10]
print(new_words)
lenght = 0
for word in new_words:
lenght += len(word)
if new_words:
template = 'Это предложение со словами длины {:.1f}'
print (template.format(lenght/len(new_words)))
print ('Введите три числа A,B и C')
a = int(input ())
b = int(input ())
c = int(input ())
print ("A =",a)
print ("B =",b)
print ("C =",c)
print ('A+B =',(a+b))
print ('A*C + B =',(a*c+b))
if (a+b) == c:
if (a*c)+b==0:
print ('сумма чисел A и B равна С и число С является решением квадратного уравнения ax+b=c')
else:
print ('сумма чисел A и B равна С, но число С не является решением квадратного уравнения ax+b=c')
elif (a*c)+b == 0:
print ('сумма чисел A и B не равна С, но число С является решением квадратного уравнения ax+b=c')
else:
print ('сумма чисел A и B не равна С и число С не является решением квадратного уравнения ax+b=0')
word = input('Vvedite slovo')
offset = 1
int(offset)
for offset in range (len(word)):
print (word [offset::])
with open (r"C:\Users\Анна\Documents\GitHub\prog\PythonHW8\words.csv", 'r', encoding = 'utf-8') as text:
mas = []
for line in text:
words = line.split(',')
for word in words:
mas.append(word)
words = {}
for i in mas:
word = i.split(';')
words[word[0].strip()] = word[1].strip()
for key in words:
print(key, '...')
p = 3
for i in range (3):
if input() != words[key]:
p -= 1
print('Осталось', p, 'попыток')
if p == 0:
print('Вы не угадали слово')
else:
print ('Ура! Вы угадали слово!')
break
import random
def nouns():
f = open (r'D:\Desktop\Аня\sentence_generator\nouns.txt','r', encoding = 'UTF-8')
a = f.read()
a = a.split()
arr = []
for w in a:
arr.append(w)
return random.choice(arr)
f.close()
def adjectives():
f = open (r'D:\Desktop\Аня\sentence_generator\adjectives.txt','r', encoding = 'UTF-8')
a = f.read()
a = a.split()
arr = []
for w in a:
arr.append(w)
return random.choice(arr) + ' ' + nouns ()
def verbs():
f = open (r'D:\Desktop\Аня\sentence_generator\verbs.txt','r', encoding = 'UTF-8')
a = f.read()
a = a.split()
arr = []
for w in a:
arr.append(w)
return random.choice(arr)
f.close()
def adverbs():
f = open (r'D:\Desktop\Аня\sentence_generator\adverbs.txt','r', encoding = 'UTF-8')
a = f.read()
a = a.split()
arr = []
for w in a:
arr.append(w)
return random.choice(arr)
f.close()
def assertion():
return(adjectives()) + ' ' + (verbs()) + 't' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs())
def sentence():
return 'La ' + (assertion()) + '.'
def negation():
return 'La ' + (adjectives()) + ' ' + 'ne' + ' ' + (verbs()) + 't' + ' ' + 'pas ' + 'une ' + (adjectives()) + ' '\
+ (adverbs()) + '.'
def question():
return 'La ' + (adjectives()) + ' ' + (verbs()) + 't' + '-elle ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '?'
def conditions ():
return 'Si ' + (assertion()) + ', ' + (assertion()) + '.'
def imperative():
a = str(verbs())
return (a.capitalize() + 's' + ' ' + 'une ' + (adjectives()) + ' ' + (adverbs()) + '!')
mas = [(sentence()),(negation()),(question()),(conditions ()),(imperative ())]
mass = []
for i in range (len(mas)):
for item in mas:
randitem = random.choice(mas)
if randitem not in mass:
mass.append(randitem)
print(randitem)
def content(name):
with open (name, 'r', encoding = 'utf-8') as f:
content = f.read()
return content
name = r"C:\Users\Анна\Documents\GitHub\prog\PythonHW10\Squirrels.html"
import re
reg = u'( | Отряд: | \n(.*) | )'
link = re.search(reg, content(name))
link = ((re.search(('title="(.*)"'),link.group())).group()).strip('title="')
print("Отряд", link)
with open (r'C:\Users\Анна\Documents\GitHub\prog\PythonHW11\lingva.html', 'r', encoding = 'utf-8') as f:
content = f.read()
import re
article = re.sub(u'язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?!»():-;])', 'шашлык\\1', content)
article2 = re.sub(u'Язык((а(х|ми?)?|у|о(м|в)|и|е)?[\s.,— ''""<>?»!():-;])', 'Шашлык\\1', article)
with open ('new.txt', 'w', encoding='utf-8') as f:
f.write(article2)
def count_tf(word, text):
return text.count(word) / len(text)
def count_df(word, texts):
n = [1 for text in texts if word in text]
return sum(n)
def count_idf(word, texts):
n = len(texts) / (1 + count_df(word, texts))
return n
from math import log
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
return log(tf, 10) * log(idf, 10)
import re
punct = '[.,!«»?&@"$\[\]\(\):;%
def preprocessing(text):
text_wo_punct = re.sub(punct, '', text.lower())
words = text_wo_punct.strip().split()
return words
import os
texts_dic = {}
for root, dirs, files in os.walk('wikipedia'):
for f in files[:50]:
with open(os.path.join(root, f), 'r', encoding='utf-8') as t:
text = preprocessing(t.read())
texts_dic[f.split('.')[0]] = text
texts = list(texts_dic.values())
for text in texts_dic:
for word in texts_dic[text]:
scores = {}
scores[word] = count_tf(word, texts_dic[text])
if scores[word] >= 55:
texts_dic[text].pop(word)
for text in texts_dic:
print("Top words in document {}".format(text))
scores = {}
for word in texts_dic[text]:
scores[word] = count_tfidf(word, texts_dic[text], texts)
sorted_words = sorted(scores.items(), key=lambda x: x[1])
for word, score in sorted_words[:5]:
print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
import os
import re
import codecs
f2 = open('table.csv', 'w', encoding = 'utf-8')
f2.write('Название текста' + ',' + 'Автор' + ',' 'Дата создания текста')
for file in os.listdir('.'):
if file.endswith('xhtml'):
with codecs.open(file, 'r', 'Windows-1251') as f:
a = f.read()
f.close()
r1 = re.search('(.*)', a)
r2 = re.search('', a)
r3 = re.search('', a)
if r1 and r2 and r3:
f2.write(r1.group(1) + ',' + r2.group(1) + ',' + r3.group(1))
f2.close()
import os
import re
import codecs
f1 = open('file_words.txt', 'w', encoding = 'utf-8')
for file in os.listdir('.'):
if file.endswith('xhtml'):
f = codecs.open(file, 'r', 'Windows-1251')
a = f.read()
f.close()
r = re.search('(.*)', a)
if r:
f1.write(r.group(1) + '\t' + str(len(re.findall('', a))) + '\n')
print('1')
f1.close()
import os
def func1():
freqdict = {}
for root, dirs, files in os.walk('.'):
for d in dirs:
if d[0] in freqdict:
freqdict[d[0]] += 1
else:
freqdict[d[0]] = 1
return freqdict
def func2(freqdict):
x = 0
for i in freqdict:
if freqdict[i] > x:
x = freqdict[i]
a = i
print ('название большинства папок начинается на ' + a)
return
func2(func1())
import random
def length(string):
s1 = ''
i = 0
for i in range(len(string)):
s1 += '.'
return s1
def create_arr_and_dic():
f = open("Слова и подсказки.csv", encoding = "utf-8")
a = f.readlines()
arr = []
dic = {}
for line in a:
words = line.split(';')
x = words[0].strip('\ufeff')
arr.append(x)
dic[x] = words[1].strip('\n')
return arr, dic
array, dictionary = create_arr_and_dic()
y = random.choice(array)
print('Вот ваша подсказка:', y, length(y))
z = input('Загаданное слово: ')
if z == dictionary[y]:
print('Правильно.')
else:
print('Увы, нет:(')
import re
def func1():
d = {}
f = open('тестовый файл.txt', 'r', encoding = 'utf-8')
a = f.readlines()
for line in a:
line = re.sub('(\.\.?\.?|\?|!)(\n)? ?', '.', line)
sentences = line.split('.')
for sentence in sentences:
if len(sentence) >= 1:
d[sentence] = {word.strip(): len(word.strip())\
for word in sentence.split(' ')}
return d
print(func1())
word=input('Введите слово ')
anotherword=''
sameword=word
print(word)
for i in range(len(word)-1):
anotherword=word[len(word)-i-1]
for k in range(len(sameword)-1):
anotherword+=sameword[k]
print(anotherword)
sameword=anotherword
f=open('text.txt', encoding="utf-8")
a=f.readlines()
z=0
x=0
m=0
n=0
for line in a:
words=line.split()
for z in range(len(words)):
if words[z].endswith('.') or words[z].endswith(','):
x+=1
m+=x
x=0
n+=len(words)
print ((n-m)/n*100, '% слов в этом тексте не оканчиваются точкой или запятой')
import random
def actor3():
slova=[]
f=open('actor3.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
slovo=random.choice(slova)
slovo=slovo.capitalize()
return slovo
def adj2():
slova=[]
f=open('adj2.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
return random.choice(slova)
def line1(noun, adjective):
return noun + ' ' + adjective
def adverb2():
slova=[]
f=open('adverb2.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
slovo=random.choice(slova)
slovo=slovo.capitalize()
return slovo
def verb2():
slova=[]
f=open('verb2.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
return random.choice(slova)
def place2():
slova=[]
f=open('place2.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
return random.choice(slova)
def line2(adverb, verb, place):
return adverb + ' ' + verb + ' di ' + place + '.'
def actor2():
slova=[]
f=open('actor2.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
slovo=random.choice(slova)
slovo=slovo.capitalize()
return slovo
def verb3():
slova=[]
f=open('verb3.txt', encoding="utf-8")
a=f.readlines()
z=0
for line in a:
words=line.split()
for z in range(len(words)):
slova.append(words[z])
z=0
return random.choice(slova)
def line3(noun, verb):
return noun + ' ' + verb + '.'
def randomhaiku():
haiku = line1(actor3(), adj2()) +\
'\n' + line2(adverb2(), verb2(), place2()) +\
'\n' + line3(actor2(), verb3())
return haiku
print (randomhaiku())
arr=[]
s=input('Ввведите латинское слово ')
if len(s)!=0:
arr.append(s)
while len(s)!=0:
s=input('Ввведите латинское слово ')
if s.endswith ('re') or s.endswith ('i')or s.endswith ('isse') \
or s.endswith ('us esse') or s.endswith ('a esse') or s.endswith ('um esse') \
or s.endswith ('um iri'):
arr.append(s)
for i in range (len(arr)):
print (arr[i])
N=int(input('Введите число '))
x=1
while x!=N:
word=input('Введите слово ')
x+=1
if word=='программирование':
break
print('Работа программы завершена')
import re
def func1():
arr = []
i = 0
f = open("Текст с глаголом выпить.txt", encoding = "utf-8")
a = f.readlines()
for line in a:
words = line.split()
for i in range(len(words)):
words[i] = words[i].lower()
arr.append(words[i].strip('.,!?/\|()";:'))
f.close()
return arr
arr1 = []
i = 0
for i in range(len(func1())):
if re.search('вып((ей(те)?)|(ь(е((шь)|м|те?)|ют?))|(и((л(а|о|и)?)|(т(ь?|(ы(й|ми?|х|е))\
|(ая?)|(о(е|(го)|й|му?)?)|(ую))))|в(ш((ая)|(ую)|и(й|ми?|х)|е(е|ю|му?)))?))', func1()[i]):
if func1()[i] not in arr1:
arr1.append(func1()[i])
print(func1()[i])
a=input('Введите число а')
b=input('Введите число b')
c=input('Введите число c')
a=int(a)
b=int(b)
c=int(c)
if a+b==c:
print ('a и b в сумме дают c')
else:
print ('a и b в сумме НЕ дают c')
if a/b==c:
print ('a разделить на b равно c')
else:
print ('a разделить на b НЕ равно c')
def func1(text_file):
ed = 0
y = 0
i = 0
f = open(text_file, encoding = "utf-8")
a = f.readlines()
for line in a:
words = line.split()
for i in range(len(words)):
if words[i].endswith('ed'):
ed += 1
if words[i].endswith('ied'):
y += 1
arr = []
arr.append(ed)
arr.append(y)
return arr
a = input('Введите название файла, который хотите открыть: ')
print('Количество форм на -ed в тексте: ', func1(a)[0], \
'\nИз них образованы от глаголов на -y: ', func1(a)[1])
import re
def func1():
f = open("Ферма, Пьер — Википедия.html", encoding = "utf-8")
a = f.readlines()
i = 0
for i in range(len(a)):
r1 = re.search("Научная сфера: | ", a[i])
r2 = re.search("", a[i+1])
if r1 and r2:
r = re.search("( \
.* )", \
a[i+2])
break
f.close()
return r
def func2():
if func1():
title = func1().group(2)
else:
print ('что-то пошло не так')
return title
f = open("text_wiki.txt", 'w', encoding = "utf-8")
f.write(func2())
f.close()
f = open("text_wiki.txt", encoding = "utf-8")
a = f.readlines()
for line in a:
print(line)
import os
alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ. '
def func1():
number = 0
arr1 = []
for i in os.listdir('.'):
if os.path.isfile(i):
j = 0
check1 = True
check2 = 0
for j in range(len(i)):
if i[j] not in alphabet:
check1 = False
if i[j] == '.':
check2 += 1
if check1 == True and check2 <= 1:
number += 1
arr1.append(i)
print('Найдено файлов, название которых состоит \
только из латинских символов: ' + str(number))
return arr1
def func2(arr):
arr2 = []
for i in arr:
if i[0:i.find('.')] not in arr2:
arr2.append(i[0:i.find('.')])
for k in arr2:
print (k)
return
func2(func1())
import re
def func1():
f1 = open("Философия -- Википедия.txt", 'r', encoding = "utf-8")
change1 = re.sub('Филос(о́|о)фи(я(х|ми?)?|и|е?й|ю)', 'Астрол\\1ги\\2', f1.read())
change2 = re.sub('философи(я(х|ми?)?|и|е?й|ю)', 'астрологи\\1', change1)
f1.close()
return change2
def func2():
f2 = open("Астрология.txt", 'w', encoding = "utf-8")
f2.write(func1())
f2.close()
return True
func2()
import re
def func1():
arr = []
i = 0
f = open("Космическая программа Китая.txt", encoding = "utf-8")
a = f.readlines()
for line in a:
arr.append(line)
f.close()
return arr
i = 0
arr1 = []
for i in range(len(func1())):
res = re.findall('«[А-Яа-я ]*-[1-9]»', func1()[i])
j = 0
for j in range(len(res)):
if res[j] not in arr1:
arr1.append(res[j])
print (res[j])
word=input('Введите русское существительное первого склонения')
if word.endswith('а') or word.endswith ('я'):
print ('Именительный падеж, единственное число')
elif word.endswith ('ами') or word.endswith ('ями'):
print ('Творительный падеж, множественное число')
elif word.endswith('ы') or word.endswith ('и'):
print ('Родительный падеж, единственное число или именительный или винительный падеж, множественное число')
elif word.endswith('е'):
print ('Дательный или предложный падеж, единственное число')
elif word.endswith('ой') or word.endswith ('ою') or word.endswith('ёй') or word.endswith ('ёю')or word.endswith ('ею') or word.endswith ('ей'):
print ('Творительный падеж, единственное число')
elif word.endswith('у') or word.endswith('ю'):
print ('Винительный падеж, единственное число')
elif word.endswith ('ам') or word.endswith ('ям'):
print ('Дательный падеж, множественное число')
elif word.endswith ('ах') or word.endswith('ях'):
print ('Предложный падеж, множественное число')
else:
print ('Родительный или винительный падеж, множественное число')
import re
def func3b(string):
r = re.match('([А-Яа-я][a-я]*)= 10:
print (word, freqdict(func1())[word])
a=9
a=int(a)
s=input('Введите число')
s=int(s)
if a==s:
print('Позравляю, вы угадали')
else:
if a>s:
print('Загаданное число больше')
if a.+', line)
w_sum += len(arr1)
arr2 = re.findall('ana', line)
ana_sum += len(arr2)
print (str(ana_sum/w_sum))
return
func1()
def func1():
arr = []
i = 0
f = open("1.txt", encoding = "utf-8")
a = f.readlines()
for line in a:
words = line.split()
for i in range(len(words)):
words[i] = words[i].lower()
words[i] = words[i].strip('.,!?/\|()";:')
arr.append(words[i])
f.close()
return arr
def func2(x,arr):
glasnye = 'аяоёуюэеыи'
slova = []
i = 0
for i in range(len(arr)):
j = 0
slogi = 0
for j in range(len(arr[i])):
if arr[i][j] in glasnye:
slogi += 1
if slogi == x:
slova.append(arr[i])
return slova
def func3(bukva,arr):
slova = []
for word in arr:
if word[0] == bukva:
slova.append(word)
return slova
y = input('Введите букву русского алфавита ')
print(func3(y,func1()))
def func1():
arr = []
i = 0
f = open("1.txt", 'r', encoding = "utf-8")
a = f.readlines()
for line in a:
words = line.split()
for i in range(len(words)):
words[i] = words[i].lower()
arr.append(words[i].strip(',.()«»!'))
f.close()
arr.sort()
return arr
def freqdict(arr):
word_count = {}
for word in arr:
if word not in word_count:
word_count[word] = 1
else:
word_count[word] += 1
return word_count
f1 = open("2.tsv", 'w', encoding = "utf-8")
for j in sorted(freqdict((func1()))):
f1.write(j)
f1.write('\t')
f1.write(str(freqdict(func1())[j]))
f1.write('\n')
f1.close()
alphabet = 'abcdefghijklmnopqrstuvwxyzабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
alphabet = list(alphabet)
def freqdict1(arr):
letter_count = {}
for letter in alphabet:
letter_count[letter] = 0
for word in arr:
if word.startswith(letter):
letter_count[letter] += 1
return letter_count
f2 = open("3.tsv", 'w', encoding = "utf-8")
for k in sorted(freqdict1(func1())):
f2.write(k)
f2.write('\t')
f2.write(str(freqdict1(func1())[k]))
f2.write('\n')
f2.close()
a=9
a=int(a)
s=input('Введите число ')
if len(s)==0:
print ('Game over')
s=int(s)
while a!=s:
if a>s:
print('Загаданное число больше')
if a.+", line)
if s1:
if s1.group(1) not in freqdict:
freqdict[s1.group(1)] = 1
else:
freqdict[s1.group(1)] += 1
return freqdict
def func2():
f3 = open("Ключи.txt", 'w', encoding = "utf-8")
for i in func1():
f3.write(i)
f3.write('\n')
f3.close()
return True
func2()
def func3():
f4 = open("Прилагательные.txt", 'w', encoding = "utf-8")
for i in func1():
s2 = re.search("l.f...", i)
if s2:
f4.write(i)
f4.write(' ')
f4.write(str(func1()[i]))
f4.write('\n')
f4.close()
return True
func3()
def func4():
f5 = open("Внутри тега body.txt", 'r', encoding = "utf-8")
change1 = re.sub("(.+)", "\\1 \\2 \\3", f5.read())
change2 = re.sub("<.*>", ' ', change1)
f5.close()
return change2
import os
import re
def preprocessing():
all_meta = []
w = open('results.txt', 'w', encoding = 'utf-8')
for el in os.listdir('news'):
with open(os.path.join('news',el), 'r', encoding = 'Windows-1251') as f:
article = f.read()
sentences = re.findall(r'', article)
template = '{} {}\n'
w.write(template.format(el, len(sentences)))
author = re.findall(r'', article)
authorstr = author[0]
authorstr = re.sub('', '', authorstr)
topic = re.findall(r'', article)
topicstr = topic[0]
topicstr = re.sub('', '', topicstr)
meta = el+','+authorstr+','+topicstr+'\n'
all_meta.append(meta)
words = []
wordsraw = re.findall('.+', article)
for el in wordsraw:
wordsrawstr = el
wordsrawstr = re.sub('', '', wordsrawstr)
wordsrawstr = re.sub('', '', wordsrawstr)
wordsrawstr = re.sub('`', '', wordsrawstr)
wordsrawstr = wordsrawstr.lower()
words.append(wordsrawstr)
bigrams = []
for ind in range(1, len(words) - 1):
bigrams.append(' '.join([words[ind - 1], words[ind]]))
w.close()
return bigrams, all_meta
def data(all_meta):
w = open('metadata.csv', 'w', encoding = 'utf-8')
w.write('Название файла,Автор,Тематика текста\n')
for el in all_meta:
w.write(el)
w.close()
def bigram_processing(bigrams):
w = open('bigrams_res.txt', 'w', encoding = 'utf-8')
for el in bigrams:
if re.match(r'(в|на|о|об|обо|при|по) .+(е|и|ах|ях)', el) != None:
bigram = el + '\n'
w.write(bigram)
w.close()
bigrams, all_meta = preprocessing()
bigram_processing(bigrams)
data(all_meta)
w=input('Введите слово: ')
while w == '':
w=input('Попробуйте еще раз: ')
border = 1
for i in range (len(w) // 2):
print (w[border:len(w) - border])
border += 1
import re
def print_forms():
with open('rudin.txt', 'r', encoding='utf-8') as f:
text = f.read()
wordarr = text.split()
sit_arr = []
for word in wordarr:
word.lower()
word.strip(',...!?-–— :,')
t = re.match('си(жу|д(е(ть|в((ши)?й?)?|л(а|о|и)?)|и(те?|м|шь)?|я(т|щий)?))', word)
if (t != None) and (word not in sit_arr):
sit_arr.append(word)
for el in sit_arr:
print (el)
print_forms()
import random
def create_dict():
with open('db.txt', 'r', encoding = 'utf-8') as f:
db = f.read()
phrases = db.split('\n')
clues = dict()
keys = []
phrase_split = []
for phrase in phrases:
phrase_split = phrase.split()
clues[phrase_split[len(phrase_split) - 1]] = phrase_split[0:len(phrase_split) - 1]
keys.append(phrase_split[len(phrase_split) - 1])
return clues, keys
def show(clues, keys, shown):
key = random.choice(keys)
while key in shown:
key = random.choice(keys)
clue_arr = clues[key]
for el in clue_arr:
print(el, end = ' ')
guess = input()
if guess.lower() == key:
check = True
else:
check = False
return check, key
def result(check):
congrats = ['Поздравляю!', 'horoshego dnya!', 'Угадали!', 'Верно!', 'Хорошо сработано!']
condolences = ['Попробуйте еще раз!', 'Не отчаивайтесь, продолжайте!', 'Почти в точку... у вас есть еще попытка!', 'nichego, zavtra otgadaete!', 'escho chut-chut...']
if check == True:
print(random.choice(congrats))
else:
print(random.choice(condolences))
def run():
shown = []
for i in range (10):
clues, keys = create_dict()
check, key = show(clues, keys, shown)
result(check)
if check == True:
shown.append(key)
run()
print('Всего доброго!')
import re
def change():
with open('aves.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
text = re.sub(r'\bптице.\b', r'рыбо.', text)
text = re.sub(r'\bПтице.\b', r'Рыбо.', text)
text = re.sub(r'\bптиц', r'\bрыб', text)
text = re.sub(r'\bПтиц', r'\bРыб', text)
with open('fish.txt', 'w', encoding = 'utf-8') as f:
f.write(text)
print('Текст записан в файл fish.txt')
change()
import re
def search():
with open('chuvash.html', 'r', encoding = 'utf-8') as f:
source = f.read()
search_arr = source.split('', el)
codearr = re.split('', el[1])
result = codearr[0]
return result
def record(result):
with open('blank.txt', 'w', encoding = 'utf-8') as f:
f.write(result)
f.close()
print('Трехбуквенный код языка записан в файл blank.txt')
result = search()
record(result)
print('Введите число')
n=float(input())
while n<2:
print ('Отсутствует степени 2, не превышающие', n, 'Пожалуйста, введите число не меньше двух.')
n=float(input())
d=2
print ('Степени числа 2, не превышающие', n, ':', end=' ')
while d<=n:
print(d, end=' ')
d*=2
def opentext (title):
with open(title, 'r', encoding='utf-8') as f:
text = f.read()
arr = []
arr = text.split()
for elem in arr:
elem.lower()
elem.strip('!-./?"", ')
return arr
def firstletter(letter, arr):
wordsarr = []
for elem in arr:
if letter == elem[0:2]:
wordsarr.append(elem)
return wordsarr
def questions():
file_name = input('Введите путь к файлу: ')
minlen = int(input('Введите минимальную длину слова: '))
arr = opentext(file_name)
un_words = firstletter('un', arr)
return minlen, un_words
def count(minlen, un_words):
wordslen = []
n = 0
for elem in un_words:
for letter in elem:
n += 1
if n > minlen:
wordslen.append(elem)
n = 0
print('Количество слов, начинающихся с un:', len(un_words))
print('Процент слов длинее', minlen, ':', len(wordslen)/len(un_words)*100)
minlen, un_words = questions()
count(minlen, un_words)
import os
import shutil
import re
def countfolders():
obj = os.listdir()
folders = [el for el in obj if os.path.isdir(el)]
result = []
for folder in folders:
if r'[a-z]|[A-Z]' and r'[а-яё]|[А-ЯЁ]' in folder:
result.append(folder)
print(folder)
print('Всего папок, удовлетворяющих условию:', len(result))
countfolders()
import os
def mostfiles():
number = {root : len(files) for root, dirs, files in os.walk('.')}
c = 0
folder = ''
for root in number:
if number[root] > c:
c = number[root]
folder = root
print('Количество файлов в папке по адресу', folder, ':', c)
mostfiles()
text = open('exomars.txt','r',encoding='utf-8')
arr = []
countline = 0
countall = 0
symb = 0
for line in text:
countall += 1
arr = line.split( )
for el in arr:
if el == '—':
symb += 1
countwords = len(arr) - symb
if countwords > 5:
countline += 1
text.close()
print('Всего строк:', countall,'Строк с числом слов больше 5:', countline, 'Процент:', round(countline*100/countall), '%')
arr = []
arr1 = []
i = 0
print('Пожалуйста, введите 8 слов')
while i != 8:
word = input()
arr.append(word)
i += 1
i = 0
while i <= 6:
pair = arr[i] + arr[i+1]
arr1.append(pair)
i += 2
for el in arr1:
print (el)
import random
def adj():
adj_arr = []
contadj = open('esenin_adj_pl.txt', 'r', encoding='utf-8')
for line in contadj:
line_lc = line.capitalize().strip()
adj_arr.append(line_lc)
contadj.close()
return random.choice(adj_arr)
def noun():
noun = []
contnoun = open('spi_noun_pl.txt', 'r', encoding='utf-8')
for line in contnoun:
line_lc = line.lower().strip()
noun.append(line_lc)
contnoun.close()
return random.choice(noun)
def verb():
verbs = []
contverbs = open('majakovsky_verbs.txt', 'r', encoding='utf-8')
for line in contverbs:
line_lc = line.capitalize().strip()
verbs.append(line_lc)
contverbs.close()
return random.choice(verbs)
def adv():
adv = []
contadverb = open('pushkin_adverbs.txt', 'r', encoding='utf-8')
for line in contadverb:
line_lc = line.lower().strip()
adv.append(line_lc)
contadverb.close()
return random.choice(adv)
def prop():
prop = []
contprop = open('properties.txt', 'r', encoding='utf-8')
for line in contprop:
line_lc = line.lower().strip()
prop.append(line_lc)
contprop.close()
return random.choice(prop)
def line1():
syll = 0
while syll != 5:
syll = 0
first = adj() + ' ' + noun()
for letter in first:
if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
syll += 1
return first
def line2():
syll = 0
while syll != 7:
syll = 0
second = verb() + ' ' + adv() + ' ' + adv() + random.choice(['!','?','.','...'])
for letter in second:
if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
syll += 1
return second
def line4():
syll = 0
while syll != 7:
syll = 0
fourth = verb() + ' ' + adv() + random.choice(['!','?','.','...'])
for letter in fourth:
if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
syll += 1
return fourth
def line5():
syll = 0
person=['Я','Ты']
while syll != 7:
syll = 0
fifth = random.choice(person) + ' ' + prop() + ' ' + adv() + random.choice(['!','?','.','...'])
for letter in fifth:
if letter in 'АЕЁИОУЫЭЮЯаеёиоуыэюя':
syll += 1
return fifth
print(line1())
print(line2())
print(line1())
print(line4())
print(line5())
print('Введите три числа')
a,b,c=float(input()), float(input()), float(input())
div=a/b
deg=a**b
if div==c:
print ('Результат деления А на B равен С')
else:
print ('Результат деления А на B НЕ равен С')
if deg==c:
print ('А в степени B равно С')
else:
print ('А в степени B НЕ равно С')
import re
def preproc():
with open('text.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
allsent = re.split(r'[\.\?\!]', text)
allsent = [sent.lower() for sent in allsent]
allsent = [re.sub(r'[,—“\':”\(\)]', '', sent) for sent in allsent]
return allsent
def count(sent):
num = {word : sent.count(word) for word in sent}
several = {word : num[word] for word in num if num[word]>1}
if several == {}:
several = {'Повторяющихся слов' : '0'}
return several
def display(several):
print('Следующее предложение: ')
template = '{:^10} {:^10}'
for keyword in several:
print(template.format(keyword, several[keyword]))
allsent = preproc()
for sentence in allsent:
arr = re.split(r' ', sentence)
several = count(arr)
display(several)
import re
def opencount():
with open('corp.xml', 'r', encoding = 'utf-8') as f:
text = f.readlines()
c = 0
for line in text:
line.strip('\s')
if '' not in line:
c += 1
else:
break
numheader = str(c) + '\n'
return numheader, text
def create_dict(text):
newdict = {}
typearr = []
allmorphs = []
for line in text:
if '')
allmorphs.append(morph1[0])
keys = []
for el in allmorphs:
if el not in keys:
keys.append(el)
for key in keys:
num = allmorphs.count(key)
newdict[key] = num
return newdict
def writenum(c, newdict, neutrum, csvarr):
with open('result.txt', 'w', encoding = 'utf-8') as f:
f.write(c)
for key, freq in newdict.items():
string = str(key) + ':' + str(freq) + '\n'
f.write(string)
line = ''
for el in neutrum:
line += el + ', '
line += '\n'
f.write(line)
for el in csvarr:
f.write(el)
print('Записано.')
def search_pro_n(text):
neutrum = []
for line in text:
q = re.search('type="f.h', line)
if q != None:
form1 = line.split('">')
form2 = form1[1].split('')
neutrum.append(form2[0])
return neutrum
def wholecorpora():
csvarr = []
with open('corp.xml', 'r', encoding = 'utf-8') as f:
text = f.read()
arr = text.split('')
arr1 = arr[1].split('')
arrlines = arr1[0].split('\n')
for line in arrlines:
if '', ', ', line)
line = re.sub('', '\n', line)
csvarr.append(line)
return csvarr
c, text = opencount()
newdict = create_dict(text)
neutrum = search_pro_n(text)
csvarr = wholecorpora()
writenum(c, newdict, neutrum, csvarr)
file = open ("цитаты1.txt", "r", encoding = "utf-8")
stroki = 0
for line in file:
arr = line.split('—')
ar = arr[0].split()
if len(arr) > 0:
if len(ar) < 10:
print (arr[0])
a = float(input ('Введите a:'))
b = float(input ('Введите b:'))
c = float(input ('Введите c:'))
U1 = U4 = False
if a * b == c:
U1 = True
print ('Выполняется условие 1')
if a * c + b == 0:
U4 = True
print ('Выполняется условие 4')
if U1 and U4:
print ('Выполняются условия 1 и 4')
else:
if U1 == False and U4 == False:
{
print ('НЕ выполняется ни одно из условий 1 или 4')
}
print ('Для завешения нажмите ENTER')
ENTER = input('')
import re
def get_text(fn):
a = []
with open(fn, 'r', encoding = "utf-8") as f:
for line in f:
a.append(line)
return a
def main():
text = get_text('Санкт-Петербург — Википедия.html')
reg = ']*?>(UTC[+-]?\d{1,2}:?\d{0,2})'
for ti in text:
m = re.search(reg, ti)
if m != None:
print(m.group(1))
return m.group(1)
def record():
r = main()
f = open("result.txt","w")
f.write("Часовой пояс - " + r)
f.close()
record()
def open_text():
with open('Austen Jane. Pride and Prejudice.txt', "r", encoding = "utf-8") as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip('.,!?-;:“"”''')
return arr
def isness(word):
Ret = 0
if len(word) > 4:
if word[-4:] == 'ness':
Ret = 1
else:
Ret = 0
return Ret
def AddInList(word, List, Qn):
Yes = 0
for i in range(len(List)):
if (List[i] == word):
Qn[i] +=1
Yes = 1
if (Yes == 0):
List.append(word)
Qn.append(1)
Inarr = open_text()
List = list()
Qn = list()
for i in range (len(Inarr)):
if isness(Inarr[i]) == 1:
AddInList(Inarr[i], List, Qn)
print('Количество разных сущ. с суффиксом -ness равно: ' + str(len(List)))
Max = 0
Ind = 0
for i in range(len(List)):
if Qn[i] > Max:
Ind = i
Max = Qn[i]
print('Максимальную частотность имеет слово: ' + List[Ind] + ', с частотностью: ' + str(Qn[Ind]))
import os
import re
def papka():
folder = [f for f in os.listdir('.')if not re.search(r'[0-9]+',f)if os.path.isfile(f)]
print(len(folder))
return folder
papka()
def dop():
arr = []
astr = 0
exist = 0
folder = [f for f in os.listdir('.')]
for p in range(len(folder)):
for j in range(len(folder[p])):
if folder[p][j] == '.':
astr = folder[p][0:j]
exist = 0
for k in range(len(arr)):
if arr[k] == astr:
exist = 1
if exist == 0:
arr.append(astr)
return arr
print(dop())
word = input ("Введите слово на кириллице:")
i = 0
while i < len(word):
if word[i] == 'п' or word[i] == 'о' or word[i] == 'е':
print (word[i])
i = i+2
print ("Для завершения работы нажмите ENTER")
ENTER = input ('')
import re
def open_text():
with open('Викинги — Википедия.html', "r", encoding = "utf-8") as f:
text = f.read()
return text
def replacement():
result1 = re.sub('викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'бурундук\\1', open_text())
result2 = re.sub('Викинг((и|у|е|а(х|м(и)?)?)|о(в|м)?)?[^\w]', 'Бурундук\\1', result1)
return result2
def record():
r = replacement()
f = open("result.txt","w", encoding = "utf-8")
f.write(r)
f.close()
return f
record()
import re
import os
import csv
def first():
reg = ' '
for i in os.listdir('.'):
if i.endswith('.xhtml'):
m = []
with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t:
text = t.read()
for t in re.findall(reg, text):
m.append(t)
with open('new_text.txt', 'a', encoding = 'utf-8') as f:
f.write(i+'\t'+str(len(m)) + '\n')
first()
def second():
for i in os.listdir('.'):
reg = ''
with open(os.path.join('.', i), 'r', encoding = 'utf-8') as t:
text = t.read()
for t in re.findall(reg, text):
if re.search('', text):
with open('table.csv', 'a', encoding = 'utf-8') as f:
f.write(i+','+re.search('', text))
second()
import random
def read_words(filename):
file = open(filename, "r", encoding = "utf-8")
arr = []
for line in file:
arr += line.strip().split(', ')
file.close()
return arr
def verb(number):
if number == 's':
return random.choice(read_words("singular_verbs.txt"))
else:
return random.choice(read_words("plural_verbs.txt"))
def noun(number):
if number == 's':
return random.choice(read_words("singular_nouns.txt"))
else:
return random.choice(read_words("plural_nouns.txt"))
def clinoun():
return random.choice(read_words("clitic_noun.txt"))
def adverb():
return random.choice(read_words("adverb.txt"))
def punctuation():
return random.choice(read_words("punctuation.txt"))
def verse1():
return clinoun() + ' ' + noun('s') + ' ' + adverb() + ' ' + verb('s') + punctuation()
def verse2():
return noun('pl') + ' ' + verb('pl') + ' ' + adverb() + ' ' + clinoun() + punctuation()
def verse3():
return noun('s') + ' ' + adverb() + ' ' + clinoun() + ' ' + verb('s') + punctuation()
def make_verse():
verse = random.choice([1,2,3])
if verse == 1:
return verse1()
elif verse == 2:
return verse2()
else:
return verse3()
for n in range(4):
print(make_verse())
def open_text():
with open('green.txt', "r", encoding = "utf-8") as f:
text = f.read()
arr = text.split('.')
return arr
def deli():
txt = open_text()
for i, w in enumerate(txt):
for s in '.,!?-;:“"”''()«»–':
txt[i] = txt[i].replace(s, "")
return txt
def des():
txt = deli()
dlina = [x for x in txt if len(x.split()) > 10]
return dlina
def big():
txt = des()
f = []
for i in txt:
f += [x for x in i.split() if x[0].isupper()]
return f
print (big())
file = open("text.txt", "r", encoding = "utf-8")
lmin = lmax = len(file.readline())
for line in file:
lp = len(line)
if lp > 0:
if lmin > lp:
lmin = lp
if lmax < lp:
lmax = lp
print (lmax / lmin)
def open_text_1():
with open('islandcorp.xml', "r", encoding = "utf-8") as f:
Line = 0
for i in f:
if i != '\n':
Line += 1
else:
break
return Line
def record():
with open("result1.txt","w", encoding = "utf-8") as f:
f.write(str(open_text_1()))
return
import re
def keys():
with open('islandcorp.xml', "r", encoding = "utf-8") as f:
text = f.read()
Dic = {}
reg = '.*?'
res = re.findall(reg, text)
for i in range(len(res)):
if res[i] not in Dic:
Dic[res[i]] = 1
else:
Dic[res[i]] += 1
return Dic
def record1():
with open("result2.txt","w", encoding = "utf-8") as f:
a = keys()
for key in a:
f.write(key + ',' + str(a[key])+ '\n')
record()
record1()
import os
import re
def main():
Sum = 0
for root, dirs, files in os.walk('.'):
for d in dirs:
cir = 0
for i in range(len(d)):
a = re.search(r'[а-яёЁ А-Я]+',d[i])
if a == None:
cir = 1
if cir == 0:
Sum += 1
return Sum
print(main())
arr =[]
word = input("Введите слово: ")
while word:
arr.append(word)
word = input ("Введите слово: ")
w = 0
for w in range (len(arr)):
if len(arr[w]) > 5:
print (arr[w])
print ("Для завершения работы нажмите ENTER")
ENTER = input ('')
import re
def open_text():
with open('txtfind.txt', "r", encoding = "utf-8") as f:
text = f.read()
text = text.lower()
arr = text.split()
for i, w in enumerate(arr):
arr[i] = arr[i].strip('.,!?-;:“"”''')
return arr
def find_in_text():
List = list()
regex = '\W?(на(((й((д(у(т(ся)?)?|ёшь(ся)?|ёт(ся|е(сь)?)?|ём(ся)?|и|ите(сь)?|я|енный|ены))|ти(сь)?)))|(ш(ёл(ся)?|л(а|и|о)(сь)?|едш(и|(ий|ая|ее)(ся)?)))))\W?'
words = open_text()
for i in range (len(words)):
m = re.search(regex, words[i])
if m != None:
List.append(words[i])
return List
uList = list()
List = find_in_text()
for i in range(len(List)):
Include = 0
for j in range(len(uList)):
if uList[j] == List[i]:
Include = 1
if Include == 0:
print(List[i])
uList.append(List[i])
word = ('abracadabra')
i=0
while i <= len(word):
print (word[0:i])
i = i+1
import random
def get_words(fn):
words = {}
with open(fn, 'r') as fd:
for line in fd:
word, collocations = line.split(',', 1)
words[word] = collocations.replace(word, '.'*len(word)).split(',')
return words
def ask_riddle(words_dict):
words = list(words_dict.keys())
rnd_word = random.choice(words)
rnd_collocation = random.choice(list(words_dict[rnd_word]))
print(rnd_collocation)
word = input('Пропущенное слово:')
return rnd_word, word == rnd_word
def main():
words = get_words('f3.csv')
word, answer = ask_riddle(words)
print('И это правильный ответ!' if answer else 'Вы ошиблись, правильный ответ: '+ word)
return word, answer
main()
import re
def main():
s = ''
f = open("Викинги.html","r",encoding="utf-8")
for line in f:
line = re.sub("в(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","бурундук\\2",line)
line = re.sub("В(и|и́)кинг(а(ми?|х)?|о(в|м)|у|е|и)?[^\w]","Бурундук\\2",line)
s = s + line
f.close()
return s
def record():
s = main()
f = open("results.txt","w",encoding='utf-8')
f.write(s)
f.close()
record()
a=int(input('input a number1: '))
b=int(input('input a number2: '))
c=int(input('input a number3: '))
print('\na=',a,'\nb=',b,'\nc=',c)
if a*b==c:
print('\nПроизведение чисел a и b равно числу c')
else:
print('\nПроизведение чисел a и b не равно c')
if a*c+b==0:
print('Число c является решением линейного уравнения a*x+b=0')
else:
print('Число c не является решением линейного уравнения a*x+b=0')
print('\nЧтобы завершить программу, нажмите Enter')
ENTER=input('')
import random
def phrase():
f0 = open("plus1.txt","r",encoding="utf-8")
pr1 = f0.read().split()
p1 = random.choice(pr1)
f1 = open("plus2.txt","r",encoding="utf-8")
pr2 = f1.read().split()
p2 = random.choice(pr2)
return p1 + ' ' + p2
def adjective():
f2 = open("adj.txt","r",encoding="utf-8")
adj = f2.read().split()
return random.choice(adj)
def verb():
f3 = open("verb.txt","r",encoding="utf-8")
v = f3.read().split()
return random.choice(v)
def noun(num):
f4 = open("sg.txt","r",encoding="utf-8")
nounsg = f4.read().split()
f5 = open("pl.txt","r",encoding="utf-8")
nounpl = f5.read().split()
f6 = open("ind.txt","r",encoding="utf-8")
nounind = f6.read().split()
if num == 'pl':
return random.choice(nounpl)
if num == 'ind':
return random.choice(nounind)
return random.choice(nounsg)
def punctuation():
puncts = [".", "?", "!", "...",";"]
return random.choice(puncts)
def verse1():
return phrase() + ' ' + noun("sg") + ' ' + noun("pl") + punctuation()
def verse2():
return verb() + ', ' + verb() + ' ' + noun("ind") + punctuation()
def verse3():
return noun("sg") + ' ' + adjective() + ' ' + noun("pl") + punctuation()
def doit():
verse = random.choice([1,2,3])
if verse == 1:
return verse1()
elif verse == 2:
return verse2()
else:
return verse3()
for n in range(4):
print(doit())
import os
import re
def texts(name):
f = open(name, 'r')
text = f.read()
x = re.findall('.+', text)
f.close()
return x
def resutls(s,fname):
f = open(fname,"w",encoding = "utf-8")
f.write(s)
f.close()
def words():
s = ""
for roots, dirs, files in os.walk('.'):
for file in files:
if file.endswith('.xhtml'):
s = s + file + "\t"+ str(len(texts(os.path.join(roots,file)))) + "\n"
results(s,"result1.txt")
if __name__ == '__words__':
words()
import os
def main():
num = 0
for root, dirs, files in os.walk('.'):
for d in dirs:
k = 0
for i in d:
if i not in "йцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮ":
k += 1
if k == 0:
num += 1
return num
if __name__ == '__main__':
print(main())
import random
def words():
f = open("1.csv","r",encoding="utf-8")
a = f.read().split(',')
m = []
for n in a:
b = n.rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$
m.append(b)
return m
def d():
m = words()
d = {}
for i in m:
a = i.split()
d[a[0]] = a[1]
return d
def rand():
m = words()
di = d()
mas = []
for n in di.keys():
mas.append(n)
v = random.choice(mas)
return v
def attempt():
di = d()
v = rand()
j = 0
for i in di[v]:
j += 1
print(v,'.'*j)
s = input()
if s == di[v]:
result = "you win"
else:
result = "you lose"
return result
print(attempt())
s=input("введите слово: ")
i=0
for letter in s:
if (i+1)%2!=0 :
if s[i]=='о' or s[i]=='п' or s[i]=='е':
print(s[i])
i=i+1
print('\nЧтобы завершить программу, нажмите Enter')
ENTER=input('')
import re
def text():
a=[]
f = open("Санкт-Петербург.html","r",encoding="utf-8")
for line in f:
a.append(line)
return a
def main():
a=text()
s=''
p1 = int; p2 = int
regex = '"[A-Z][A-Z][A-Z](\+|-)?[0-9][0-9]?:?[0-90-9]?"'
for line in a:
b=line.split()
for i in b:
res = re.search(regex,i)
if res != None:
p1 = i.find('>')
p2 = i.find('<')
s=i[p1+1:p2]
return s
def record():
s=main()
f = open("result.txt","w")
f.write("Часовой пояс - "+s)
f.close()
record()
def names():
import os
m = os.listdir('.')
return m
def main():
m = names()
newm = []
num = 0
for i in m:
k = 0
for j in i:
if j in '1234567890':
k += 1
if k == 0:
num += 1
if '.' in i:
i = i[:i.index('.')]
if i not in newm:
newm.append(i)
print('num = {}'.format(num))
print(newm)
if __name__ == '__main__':
main()
def text():
f = open("ness.txt","r",encoding="utf-8")
a = f.read().split()
m = []
for n in a:
b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$
m.append(b)
return m
def ness(m):
mas = []
s = ""
for i in m:
if i[-4:] == 'ness':
if i not in s:
mas.append(i)
s = s + i + " "
return mas
def numb():
m = text()
mas = ness(m)
return len(mas)
def main():
m = text()
b = ness(m)
mas2 = []
fr = ""
s = ""
for i in m:
if i[-4:] == 'ness':
s = s + i + " "
for n in b:
mas2.append(s.count(n))
maxi = mas2[0]
for j in mas2:
if j > maxi:
maxi = j
for n in b:
if s.count(n) == maxi:
fr=fr+" "+n
return fr
print("Количество разных слов на -ness =",numb(),"\nСамое(ые) частотное(ые) -",main())
import re
def lines():
f = open('vim4.txt','r',encoding='utf-8')
a = f.read()
c = re.split(r'[.?!]',a)
lines = [' '.join([word.strip('.,<>/?""-=_+''""[]{}()*&^%$
return lines
def main():
sents = lines()
results = []
for line in sents:
k = ''
k = ['+' for w in line.split()]
if len(k) > 10:
for w in line.split():
if w.istitle() == True:
results.append(w)
return results
if __name__ == '__main__':
print(main())
import re
def text():
f = open("portrait.txt","r",encoding="utf-8")
a = f.read().split()
m = []
for n in a:
b = n.lower().rstrip('.,<>/?""1234567890-=_+''[]{}()*&^%$
m.append(b)
return m
def main():
m = text()
regex = 'на(й|ш(е|ё)?)(т|д|л)(ш|енн?)?(а?я?|(и|о|ы|(е|ё)|ую?)?(т|шь)?(ся)?(м(у|и)?|го|е|й|х)?)?'
s = ''
for i in m:
res = re.search(regex,i)
if res != None:
k = 0
for j in i:
if j not in regex:
k += 1
if k == 0:
if i not in s:
s = s + i + ' '
return s
print(main())
import re
def opp():
k = 0
f = open("it.xml","r",encoding="utf-8")
for line in f:
k += 1
f.close()
return k
def record1():
f = open('result1.txt','w',encoding='utf-8')
f.write(str(opp()))
f.close()
record1()
def dic():
d = {}
regex1 = 'lemma="'
regex2 = 'type="[a-zþ0-9]+"'
f = open("it.xml","r",encoding="utf-8")
for line in f:
if re.search(regex1,line) != None:
res = re.search(regex2,line)
if res != None:
p1 = line.rfind('"')
p2 = line.find('type=')
s = line[p2+6:p1]
if s in d.keys():
d[s] += 1
else:
d[s] = 1
return d
def record2():
d = dic()
f = open('result1.txt','a',encoding='utf-8')
for i in d.keys():
f.write('\n'+i)
f.close()
record2()
def plur():
d = {}
regex1 = 'lemma="'
regex2 = 'type="[a-zþ0-9]+"'
f = open("it.xml","r",encoding="utf-8")
for line in f:
if re.search(regex1,line) != None:
res = re.search(regex2,line)
if res != None:
p1 = line.rfind('"')
p2 = line.find('type=')
s = line[p2+6:p1]
if s[0] == 'l' and s[2] == 'f':
if s in d.keys():
d[s] += 1
else:
d[s] = 1
return d
def record3():
d = plur()
f = open('result2','w',encoding='utf-8')
for i in d.keys():
f.write(i+' - '+str(d[i])+'\n')
f.close()
record3()
f=open("new1.txt","r",encoding = "utf-8")
mx=mn=len(f.readline())
for line in f:
if line != "\n":
if len(line) > mx:
mx = len(line)
if len(line) < mn:
mn = len(line)
print(mx/mn)
f.close()
f=open("text1.txt","r",encoding = "utf-8")
for line in f:
sym=line.split(" ")
if sym[2]=="союз":
print(line)
f.close()
f=open("text1.txt","r",encoding = "utf-8")
s = input("Введите слово: ")
m = []
while s!='':
m.append(s)
s=input("Введите слово: ")
for i in m:
for line in f:
sym = line.split(" ")
if i == sym[0]:
print(i,sym[1:])
else:
print(i+" - в словаре нет такого слова")
break
f.close()
f=open("text1.txt","r",encoding = "utf-8")
s=0
for line in f:
sym=line.split(" ")
if sym[4]=="ед" and sym[5]=="жен":
print(sym[0]+",")
s=s+float(sym[-1])
print(s)
f.close()
m=[]
s=input('введите слово: ')
while s!='':
m.append(s)
s=input('введите слово: ')
for word in m:
if len(word)>5:
print(word)
print('Чтобы завершить программу, нажмите ENTER')
ENTER=input('')
word=input('введите слово: ')
newword=''
for letter in word:
newword=newword+letter
print(newword)
import re
import os
def countsent(file):
sent = 0
s = open (file,'r')
lines = s.readlines()
for line in lines:
if re.search('',line):
sent = sent + 1
return sent
def file_countsent():
cw = open ('countsent.txt','w',encoding='utf-8')
for root, dirs, files in os.walk('news'):
for f in files:
cw.write(f+'\t'+str(countsent(os.path.join(root, f)))+'\n')
def text_data(txt1):
topic = re.search(r'', txt1).group(1)
author = re.search(r'', txt1).group(1)
data = [author, topic]
return data
def csv(data, name):
with open(name, 'a', encoding='cp1251') as f:
f.write(data[2]+'\t'+data[0]+'\t'+data[1]+'\n')
def supertable():
data1 = []
for root, dirs, files in os.walk('news'):
for f in files:
with open(os.path.join(root, f), 'r', encoding='cp1251') as m:
txt = m.read()
data = text_data(txt)
data.append(f)
data1.append(data)
for data in data1:
csv(data, 'supertable.csv')
file_countsent()
supertable()
import re
def openfile():
file1 = input('Введите путь к файлу: ')
with open(file1, "r", encoding="utf-8") as f:
arr = []
lines = f.readlines()
for line in lines:
if line.strip() == '':
break
else: arr.append(line)
print('Число строк заголовка', len(arr))
def dictionary():
file2 = input('Введите путь к файлу: ')
with open(file2, "r", encoding="utf-8") as f:
dictn = {}
text = f.read()
findtype = re.findall(r'type="\w+">', text)
for i in findtype:
i = i[6::].strip('">')
if i not in dictn:
dictn[i] = 1
else:
dictn[i] += 1
file3 = input('Введите путь к файлу, куда будет записана информация из словаря: ')
with open(file3, "r", encoding="utf-8") as f:
for key in dictn:
f.write(str(key, dictn[key]))
openfile()
dictionary()
with open('ugadaika.csv', 'r', encoding = 'utf-8') as f:
words = []
a = f.read()
words = a.split(',')
dic = {}
for i, word in enumerate(words):
if i%2 == 0:
dic[word] = words[i+1]
print('Я хочу сыграть с тобой в одну игру... Какое слово я загадал? Количество точек равно количеству букв в слове.')
for key in dic:
print(dic[key])
b = input()
if b == key:
print('Молодчинка!!!')
else:
print ('Ты не очень умный, я загадал не это.')
mylist = []
with open('proga.txt', 'r', encoding='utf-8') as f:
for line in f.readlines():
x = len(line)
mylist.append(x)
mini = mylist[0]
maxi = mylist[0]
for i in mylist:
if i <= mini:
mini = i
if i > maxi:
maxi = i
print(maxi/mini)
import random
def adj():
a=[]
with open ('adj.txt','r',encoding='utf-8') as f:
a=f.read()
return random.choice(a.split())
def Petya():
b=[]
with open ('nouns_like_Petya.txt','r',encoding='utf-8') as f:
b=f.read()
return random.choice(b.split())
def kustik():
k=[]
with open ('nouns_like_kustik.txt','r',encoding='utf-8') as f:
k=f.read()
return random.choice(k.split())
def prep():
c=[]
with open ('prep.txt','r',encoding='utf-8') as f:
c=f.read()
return random.choice(c.split())
def adjfem():
d=[]
with open ('adjfem.txt','r',encoding='utf-8') as f:
d=f.read()
return random.choice(d.split())
def nounfem():
e=[]
with open ('nounfem.txt','r',encoding='utf-8') as f:
e=f.read()
return random.choice(e.split())
def verb():
g=[]
with open ('verbpf.txt','r',encoding='utf-8') as f:
g=f.read()
return random.choice(g.split())
def punct():
h=[]
with open ('punct.txt','r',encoding='utf-8') as f:
h=f.read()
return random.choice(h.split())
def verse1():
return adj() + ' ' + Petya() + ' ' + verb() + ' ' + kustik() + punct()
def verse2():
return prep() + ' ' + adjfem() + ' ' + nounfem() + punct()
def verse3():
return adj() + ' ' + kustik() + ' ' + verb() + ' ' + Petya() + punct()
def verse4():
return Petya() + ' ' + verb() + ' ' + nounfem() + punct()
def make_verse():
verse = random.choice([1,2,3,4])
if verse == 1:
return verse1()
elif verse == 2:
return verse2()
elif verse == 3:
return verse3()
else:
return verse4()
for n in range(4):
print(make_verse())
import os
import re
nonum = []
num = []
for f in os.listdir('.'):
if re.search('[1234567890]', f):
num.append(f)
else:
nonum.append(f)
print('Файлов, не содержащих цифр в названии: ', len(nonum))
print('Введите число a и нажмите Enter')
a=int(input())
print('Введите число b и нажмите Enter')
b=int(input())
print('Введите число c и нажмите Enter')
c=int(input())
if a*b==c:
print(c ,'является произведением', a,' и ', b)
else:
print(c ,' не является произведением', a,' и ', b)
if c*a==(-1)*b:
print(c,'является решением линейного уравнения', a,'x +',b,'= 0')
else:
print(c,'не является решением линейного уравнения', a,'x +',b,'= 0')
b=1
int (b)
a=(input())
for i in a:
if (b%2)&((i=='о')or(i=='п')or(i=='е')):
print (i)
b+=1
import re
def vikings():
wikifile = input('Время альтернативной истории! Введите имя файла со статьей про викингов: ')
with open(wikifile, 'r', encoding = 'utf-8') as f:
wikiarticle = f.read()
return wikiarticle
def change1(wikiarticle):
myarticle1 = re.sub('викинг', 'бурундук', wikiarticle)
return myarticle1
def change2(myarticle1):
myarticle2 = re.sub('Викинг', 'Бурундук', myarticle1)
return myarticle2
def chimpunks(myarticle2):
newfile = input('Введите имя файла, куда следует поместить измененную статью: ')
with open(newfile, 'w', encoding = 'utf-8') as f:
f.write(myarticle2)
def go():
chimpunks(change2(change1(vikings())))
go()
import re
def findforms():
find = r"\bна(ш(ёл(ся)?|е(л(ся)?|дш(е(го(ся)?|м(ся|у(ся)?)?|е(ся)?|й(ся)?|ю(ся)?)|ую(ся)?|ая(ся)?|и(й(ся)?|е(ся)?|сь|м(и(ся)?)?|х(ся)?)?))|л(а(сь)?|о(сь)?|и(сь)?))|й(ти(сь)?|д(я(сь)?|у(сь|т(ся)?)?|ё(м(ся)?|шь(ся)?|т(ся|е(сь)?)?|нн(ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?)))|е(шь(ся)?|т(ся|е(сь)?)?|м(ся)?|н(а|о|ы|н((ую|ая|ы(х|е|й|ми?)|о(й|го|о|ю|му?))))?)|и(сь|те(сь)?)?)))\b"
arr = []
with open("find.txt", "r", encoding="utf-8") as f:
words = f.read()
for word in words.split():
p = re.search(find, word)
if p != None:
if word not in arr:
arr.append(word)
for item in arr:
print(item)
findforms()
a = []
s = str(input("Введите слово "))
while s != (""):
if len(s) > 5:
a.append(s)
s = str(input("Введите слово "))
print('\n'.join(a))
def counting():
with open('isl.txt', 'r', encoding='utf-8') as islen:
islen.read()
str = islen.readline().replace('\n', '')
islenlines = []
islencount = 0
for line in islen:
islenlines.append
islencount = 0
if '' in line:
break
print(islencount)
counting()
def dictionary():
lemmas = []
alsolemmas = []
str = islen.readline
for i in range(str):
if '(.*?)'
links = re.findall(reg, content)
return links
text = open_html('butterflies.html')
links = find_links(text)
for link in links[:20]:
print(link[1], '-->', link[0])
d = {"Россия":'Москва',
"Польша":'Варшава',
"США":'Вашингтон',
"Болгария":'София',
"Армения":'Ереван',
"Бразилия":'Бразилиа',
"Испания":'Москва'}
def delete_doubles(d):
arr = []
new = {}
for key in d:
if d[key] in arr:
else:
new[key] = key
arr.append(d[key])
return a
delete_doubles(d)
import re
def open_html(fname):
with open (fname, 'r', encoding='utf-8') as f:
text = f.read()
return text
def tags(text):
m = re.sub(r'<.*?>', r'', text)
t = re.sub(r'\s+',r' ', m)
s = re.sub(r'Илон Маск', r'Маленький котёнок',t)
return s
print(tags(open_html('musk.html')))
import re
rain = r"\b\дожд([ьюи]|е|ей|я(м|ми?)|ях|ём?)?\b"
s = input('Введите какое-нибудь слово: ')
m = re.search(rain, s)
if m != None:
print('Это слово является формой слова "дождь"!')
else:
print('Нетушки!')
import codecs
def open_file(file_name):
f = codecs.open(file_name, 'r', 'utf-8-sig')
words = []
for line in f:
line = line.strip()
words += line.split()
for word in words:
word = word.strip(u'.,!?:;()\'\"1234567890')
word = word.lower()
return words
def bigramms(words):
bi = create_list(words)
dic = {}
for j in bi:
if j not in dic:
dic[j] = 1
else:
dic[j] += 1
answer = ''
answer = [n + '\r\n' for n in dic]
print(answer)
return answer
def create_list(words):
bi = []
for i in range(len(words)):
if i < (len(words) - 1):
j = i+1
bi.append(words[i] + words[j])
return bi
words = open_file('text.txt')
bigramms(words)
import re
with open('news.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
punct = '[.,?!:;"\'—@–...«»
tabs = '[\t\n]'
def preprocessing(text):
text = text.strip().lower()
text = re.sub(punct, '', text)
text = re.sub(tabs, ' ', text)
words = text.split()
return words
words = preprocessing(text)
def make_freq(arr):
d = {}
for el in arr:
try:
d[el] += 1
except KeyError:
d[el] = 1
return d
word_freq = make_freq(words)
def make_bigrams(arr):
bigrams = []
for i in range(len(words)):
bigr = arr[i] + ' ' + arr[i + 1]
bigrams.append(bigr)
return bigrams
bigrams = make_bigrams(words)
bigrams_freq = make_freq(bigrams)
from math import log
def count_pmi(x, y):
bigr = x + ' ' + y
try:
p_x = word_freq[x]/len(words)
except KeyError:
p_x = 0
try:
p_y = word_freq[y]/len(words)
except KeyError:
p_y = 0
try:
p_xy = bigrams_freq[bigr]/len(bigrams)
except KeyError:
p_xy = 0
try:
pmi = log(p_xy/(p_x*p_y))
except ZeroDivisionError:
pmi = 0
return pmi
def calculate_pmi():
pmis ={}
for bigr in bigrams:
x, y = bigr.split()
pmi = count_pmi(x, y)
pmis[bigr] = pmi
return pmis
pmi = calculate_pmi()
i = 0
for el in sorted(pmi, key = lambda m: -pmi[m]):
if i > 100:
break
print(el, pmi[el])
i += 1
import os
corpus_anek = ''
corpus_izvest = ''
corpus_teh = ''
for root, dirs, files in os. walk('texts'):
if 'anekdots' in root:
for f in files:
with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1:
text = f1.read()
corpus_anek += text
if 'teh_mol' in root:
for f in files:
with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1:
text = f1.read()
corpus_teh += text
if 'izvest' in root:
for f in files:
with open(os.path.join(root,f), 'r', encoding = 'utf-8') as f1:
text = f1.read()
corpus_izvest += text
print(corpus_teh[:100])
words_anek = preprocessing(corpus_anek)
words_teh = preprocessing(corpus_teh)
words_izvest = preprocessing(corpus_izvest)
words_all = words_anek + words_teh + words_izvest
freq_anek = make_freq(words_anek)
freq_teh = make_freq(words_teh)
freq_izvest = make_freq(words_izvest)
freq_all = make_freq(words_all)
def count_pmi_cats(word, category):
p_word = freq_all[word]/len(words_all)
p_cat = 1/3
if category == 'anek':
d = freq_anek
w = len(words_anek)
elif category == 'izvest':
d = freq_izvest
w = len(words_izvest)
elif category == 'teh':
d = freq_teh
w = len(words_teh)
p_word_cat = d[word]/w
pmi = log(p_word_cat/(p_word*p_cat))
return pmi
for w in words:
if i > 100:
break
try:
pmi_anek = count_pmi_cats(w, 'anek')
pmi_izvest = count_pmi_cats(w, 'izvest')
pmi_teh = count_pmi_cats(w, 'teh')
max_pmi = max(pmi_anek, pmi_izvest, pmi_teh)
if max_pmi == pmi_anek:
print(w, 'anek')
elif max_pmi == pmi_izvest:
print(w, 'izvest')
elif max_pmi == pmi_teh:
print(w, 'teh')
except KeyError:
pass
i += 1
import os
import re
from math import log
punct = '[.,!«»?&@"$\[\]\(\):;%
tabs = '[\t\n]'
def preprocessing(text):
text_wo_punct = re.sub(punct, '', text.lower())
text_wo_punct = re.sub(tabs, ' ',text_wo_punct)
words = text_wo_punct.strip().split()
return words
def count_tf(word, text):
n = text.count(word)
return n / len(text)
def count_df(word, texts):
i = [True for text in texts if word in text]
i = sum(i)
return i
def count_idf(word, texts):
df = count_df(word, texts)
try:
idf = len(texts) / df
except ZeroDivisionError:
return 0
return idf
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
tfidf = log(tf, 10) * log(idf, 10)
return tfidf
def keywords(text, texts):
keywords = {}
dic_tfidf = {}
for word in text:
if word in dic_tfidf:
continue
tfidf = count_tfidf(word, text, texts)
dic_tfidf[word] = tfidf
i = 0
for el in sorted(dic_tfidf, key = lambda x: dic_tfidf[x]):
if i > 5:
break
i += 1
keywords[el] = dic_tfidf[el]
return keywords
def main():
texts = {}
for root, dirs, files in os.walk('wikipedia'):
for f in files:
with open(os.path.join(root, f),'r', encoding = 'utf-8') as t:
content = t.read()
text = preprocessing(content)
texts[f] = text
raw_texts = list(texts.values())
for t in texts:
print('\nИзвлекаем ключевые слова для текста {}'.format(t))
kwords = keywords(texts[t], raw_texts)
for key in kwords:
print(key, kwords[key])
if __name__ == '__main__':
main()
print ("Здравствуйте!"\
)
a = int(input("Введите число a: "))
b = int(input("Введите число b: "))
c = int(input("Введите число c: "))
if a + b == c:
print ("Числа a и b в сумме дают число c")
else:
print ("Числа a и b в сумме НЕ дают число c")
if c == -b / a:
print ("Число c является решением линейного уравнения ax + b = 0")
else:
print ("Число c НЕ является решением линейного уравнения ax + b = 0")
import re
import os
def folders():
counter = 0
numbers = '[0-9]'
titles = os.listdir('.')
for i in titles:
if os.path.isdir(i) and re.search (numbers, i):
counter += 1
return str(counter)
def names():
print('Все файлы и(или) папки в текущей папке: ')
arr = []
res = '\..+'
for i in os.listdir('.'):
name = i
if os.path.isdir(i):
name = re.sub(res, '', i)
if name not in arr:
arr.append(name)
for each in arr:
if each:
print(each + '\n')
else:
print('None')
print('Количество папок с цифрами в названии в текущей папке: ' + folders())
names()
def open_read():
num = 0
with open('F.xml', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
num += 1
return num
def write_doc(num):
numlines = str(num)
with open ('Number.txt', 'w', encoding = 'utf-8') as new_doc:
new_doc.write(numlines)
print('Количество строк: ' + numlines + '\n' + 'Создан документ Number.txt')
def main():
write_doc(open_read())
main()
import os
a = {}
def dict_new():
for root, dirs, files in os.walk('.\\news'):
for file in files:
with open (os.path.join(root, file), 'r', encoding = 'cp1251') as page:
raw_text = page.read()
a[file] = raw_text.count('(.*?)', text, flags = re.DOTALL)
cap = cap.group(3)
return cap
def write_doc(cap):
with open ('Capital.txt', 'w', encoding = 'utf-8') as new_doc:
new_doc.write(cap)
print('Столица данной страны: ' + cap + '\n' + 'Создан документ Capital.txt')
def main():
write_doc(capital(open_read()))
main()
word = input()
text = []
while word:
text.append(word)
word = input()
for i in range(len(text)):
new = text[i]
new = new[::-1]
new = list(new)
for t in range (len(new)):
if (t + 1) % 3 == 0:
new[t] = ''
wrd = ''.join(new)
print(wrd)
with open('Master and Margarita.txt','r', encoding = 'utf-8') as MM:
tablewords = []
space = 0
lines = MM.readlines()
print(' ', *lines)
for i in range(len(lines)):
for k in range(len(lines[i])):
if lines[i][k] == ' ':
space += 1
tablewords.append(space + 1)
space = 0
number = 0
for l in range(len(tablewords)):
number += tablewords[l]
averword = number/len(lines)
print('\n','Среднее количество слов в строке =',averword)
word = input('Введите слово: ')
print(word)
for i in range(len(word)):
print(word[:-(1+i)])
import random
with open('allwords.txt', 'r', encoding = 'utf-8') as aw:
lines = aw.readlines()
def noun_m1():
noun_m1 = []
noun_m1 = lines[1].split(' ')
return random.choice(noun_m1)
def noun_f1():
noun_f1 = []
noun_f1 = lines[2].split(' ')
return random.choice(noun_f1)
def noun_m2():
noun_m2 = []
noun_m2 = lines[3].split(' ')
return random.choice(noun_m2)
def noun_f2():
noun_f2 = []
noun_f2 = lines[4].split(' ')
return random.choice(noun_f2)
def noun_mid2():
noun_mid2 = []
noun_mid2 = lines[5].split(' ')
return random.choice(noun_mid2)
def noun_m3():
noun_m3 = []
noun_m3 = lines[6].split(' ')
return random.choice(noun_m3)
def noun_f3():
noun_f3 = []
noun_f3 = lines[7].split(' ')
return random.choice(noun_f3)
def noun_mid3():
noun_mid3 = []
noun_mid3 = lines[8].split(' ')
return random.choice(noun_mid3)
def noun_m4():
noun_m4 = []
noun_m4 = lines[9].split(' ')
return random.choice(noun_m4)
def noun_f4():
noun_f4 = []
noun_f4 = lines[10].split(' ')
return random.choice(noun_f4)
def noun_mid4():
noun_mid4 = []
noun_mid4 = lines[11].split(' ')
return random.choice(noun_mid4)
def noun_m5():
noun_m5 = []
noun_m5 = lines[12].split(' ')
return random.choice(noun_m5)
def noun_f5():
noun_f5 = []
noun_f5 = lines[13].split(' ')
return random.choice(noun_f5)
def noun_mid5():
noun_mid5 = []
noun_mid5 = lines[14].split(' ')
return random.choice(noun_mid5)
def noun_m6():
noun_m6 = []
noun_m6 = lines[15].split(' ')
return random.choice(noun_m6)
def noun_f6():
noun_f6 = []
noun_f6 = lines[16].split(' ')
return random.choice(noun_f6)
def noun_mid6():
noun_mid6 = []
noun_mid6 = lines[17].split(' ')
return random.choice(noun_mid6)
def verb_1():
verb_1 = []
verb_1 = lines[20].split(' ')
return random.choice(verb_1)
def verb_2():
verb_2 = []
verb_2 = lines[21].split(' ')
return random.choice(verb_2)
def verb_3():
verb_3 = []
verb_3 = lines[22].split(' ')
return random.choice(verb_3)
def verb_4():
verb_4 = []
verb_4 = lines[23].split(' ')
return random.choice(verb_4)
def verb_5():
verb_5 = []
verb_5 = lines[24].split(' ')
return random.choice(verb_5)
def verb_6():
verb_6 = []
verb_6 = lines[25].split(' ')
return random.choice(verb_6)
def conj_1():
conj_1 = []
conj_1 = lines[28].split(' ')
return random.choice(conj_1)
def conj_2():
conj_2 = []
conj_2 = lines[29].split(' ')
return random.choice(conj_2)
def adj_m1():
adj_m1 = []
adj_m1 = lines[32].split(' ')
return random.choice(adj_m1)
def adj_m2():
adj_m2 = []
adj_m2 = lines[33].split(' ')
return random.choice(adj_m2)
def adj_f2():
adj_f2 = []
adj_f2 = lines[34].split(' ')
return random.choice(adj_f2)
def adj_m3():
adj_m3 = []
adj_m3 = lines[35].split(' ')
return random.choice(adj_m3)
def adj_f3():
adj_f3 = []
adj_f3 = lines[36].split(' ')
return random.choice(adj_f3)
def adj_mid3():
adj_mid3 = []
adj_mid3 = lines[37].split(' ')
return random.choice(adj_mid3)
def adj_m4():
adj_m4 = []
adj_m4 = lines[38].split(' ')
return random.choice(adj_m4)
def adj_f4():
adj_f4 = []
adj_f4 = lines[39].split(' ')
return random.choice(adj_f4)
def adj_mid4():
adj_mid4 = []
adj_mid4 = lines[40].split(' ')
return random.choice(adj_mid4)
def adj_m5():
adj_m5 = []
adj_m5 = lines[41].split(' ')
return random.choice(adj_m5)
def adj_f5():
adj_f5 = []
adj_f5 = lines[42].split(' ')
return random.choice(adj_f5)
def adj_mid5():
adj_mid5 = []
adj_mid5 = lines[43].split(' ')
return random.choice(adj_mid5)
def adj_m6():
adj_m6 = []
adj_m6 = lines[44].split(' ')
return random.choice(adj_m6)
def adj_f6():
adj_f6 = []
adj_f6 = lines[45].split(' ')
return random.choice(adj_f6)
def adj_mid6():
adj_mid6 = []
adj_mid6 = lines[46].split(' ')
return random.choice(adj_mid6)
def adv_2():
adv_2 = []
adv_2 = lines[49].split(' ')
return random.choice(adv_2)
def adv_3():
adv_3 = []
adv_3 = lines[50].split(' ')
return random.choice(adv_3)
def adv_4():
adv_4 = []
adv_4 = lines[51].split(' ')
return random.choice(adv_4)
def adv_5():
adv_5 = []
adv_5 = lines[52].split(' ')
return random.choice(adv_5)
def adv_6():
adv_6 = []
adv_6 = lines[53].split(' ')
return random.choice(adv_6)
def numeral_m2():
numeral_m2 = []
numeral_m2 = lines[56].split(' ')
return random.choice(numeral_m2)
def numeral_f2():
numeral_f2 = []
numeral_f2 = lines[57].split(' ')
return random.choice(numeral_f2)
def numeral_mid2():
numeral_mid2 = []
numeral_mid2 = lines[58].split(' ')
return random.choice(numeral_mid2)
def numeral_m3():
numeral_m3 = []
numeral_m3 = lines[59].split(' ')
return random.choice(numeral_m3)
def numeral_f3():
numeral_f3 = []
numeral_f3 = lines[60].split(' ')
return random.choice(numeral_f3)
def numeral_mid3():
numeral_mid3 = []
numeral_mid3 = lines[61].split(' ')
return random.choice(numeral_mid3)
def numeral_m4():
numeral_m4 = []
numeral_m4 = lines[62].split(' ')
return random.choice(numeral_m4)
def numeral_f4():
numeral_f4 = []
numeral_f4 = lines[63].split(' ')
return random.choice(numeral_f4)
def numeral_mid4():
numeral_mid4 = []
numeral_mid4 = lines[64].split(' ')
return random.choice(numeral_mid4)
def numeral_m5():
numeral_m5 = []
numeral_m5 = lines[65].split(' ')
return random.choice(numeral_m5)
def numeral_f5():
numeral_f5 = []
numeral_f5 = lines[66].split(' ')
return random.choice(numeral_f5)
def numeral_mid2():
numeral_mid5 = []
numeral_mid5 = lines[67].split(' ')
return random.choice(numeral_mid5)
def numeral_f6():
numeral_f6 = []
numeral_f6 = lines[68].split(' ')
return random.choice(numeral_f6)
def numeral_mid6():
numeral_mid6 = []
numeral_mid6 = lines[69].split(' ')
return random.choice(numeral_mid6)
def row_1_5():
phrase_of_5_1 =[adj_m1() + ' ' + noun_m4(), adj_m2() + ' ' + noun_m3(), adj_m3() + ' ' + noun_m2(), adj_m4() + ' ' + noun_m1(),
numeral_m2() + ' ' + noun_m1() + ' ' + verb_2(), numeral_m2() + ' ' + noun_m2() + ' ' + verb_1(),
numeral_m2() + ' ' + noun_m3(), numeral_m3() + ' ' + noun_m1() + ' ' + verb_1(), numeral_m3() + ' ' + noun_m2(),
adj_f2() + ' ' + noun_f3(), adj_f3() + ' ' + noun_f2(), adj_f4() + ' ' + noun_f1(),
numeral_f2() + ' ' + noun_f1() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f2() + ' ' + verb_2(), numeral_f2() + ' ' + noun_f3(),
numeral_f3() + ' ' + noun_f1() + ' ' + verb_1(), numeral_f3() + ' ' + noun_f2(),
numeral_mid2() + ' ' + verb_2(), numeral_mid2() + ' ' + noun_mid2() + ' ' + verb_1(),
numeral_mid2() + ' ' + noun_mid3(),
numeral_mid3() + ' ' + verb_1(), numeral_mid3() + ' ' + noun_mid2(),noun_m5(), noun_f5(), noun_mid5()]
return random.choice(phrase_of_5_1)
def row_1_7():
phrase_of_7_1 =[adv_2() + ' ' + verb_5(), adv_3() + ' ' + verb_4(), adv_4() + ' ' + verb_3(), adv_5() + ' ' + verb_2(), adv_6() + ' ' + verb_1(),
adv_2() + ' ' + verb_4() + ' ' + conj_1(), adv_2() + ' ' + verb_3() + ' ' + conj_2(),
adv_3() + ' ' + verb_3() + ' ' + conj_1(), adv_3() + ' ' + verb_2() + ' ' + conj_2(),
adv_4() + ' ' + verb_2() + ' ' + conj_1(), adv_4() + ' ' + verb_1() + ' ' + conj_2(),
adv_5() + ' ' + verb_1() + ' ' + conj_1(), adv_5() + ' ' + conj_2(),
adv_6() + ' ' + conj_1()]
return random.choice(phrase_of_7_1)
def row_2_5():
phrase_of_5_2 =[verb_1() + ' ' + noun_m4(), verb_2() + ' ' + noun_m3(), verb_3() + ' ' + noun_m2(), verb_4() + ' ' + noun_m1(),
verb_1() + ' ' + noun_f4(), verb_2() + ' ' + noun_f3(), verb_3() + ' ' + noun_f2(), verb_4() + ' ' + noun_f1(),
verb_1() + ' ' + noun_mid4(), verb_2() + ' ' + noun_mid3(), verb_3() + ' ' + noun_mid2()]
return random.choice(phrase_of_5_2)
def row_2_7():
phrase_of_7_2 =[noun_m1() + ' ' + verb_6(),noun_m2() + ' ' + verb_5(),noun_m3() + ' ' + verb_4(),noun_m4() + ' ' +verb_3(),
noun_m5() + ' ' + verb_2(),
noun_m6() + ' ' + verb_1(),
noun_f1() + ' ' + verb_6(), noun_f2() + ' ' + verb_5(), noun_f3() + ' ' + verb_4(), noun_f4() + ' ' + verb_3(),
noun_f5() + ' ' + verb_2(),
noun_f6() + ' ' + verb_1(), noun_mid2() + ' ' + verb_5(), noun_mid3() + ' ' + verb_4(), noun_mid4() + ' ' + verb_3(),
noun_mid5() + ' ' + verb_2(),
noun_mid6() + ' ' + verb_1()]
return random.choice(phrase_of_7_2)
def row_3_5():
phrase_of_5_3 =[verb_5(), adv_5()]
return random.choice(phrase_of_5_3)
def haiku():
ready = [row_2_5() + '\n' + row_2_7() + '\n' + row_1_5(),
row_3_5() + '\n' + row_2_7() + '\n' + row_3_5(),
row_1_5() + '\n' + row_1_7() + '\n' + row_3_5()]
return random.choice(ready)
print(haiku())
word = input ('give a word')
lenghth = len(word)
z = 0
newword ='space'
while newword != '':
newword = ''
newword = word[z:lenghth]
print(newword)
z += 1
lenghth -= 1
import re
def sentences():
with open ('text.txt','r',encoding = 'utf-8') as f:
text = f.read()
m = re.findall('[^.!?]{1,}?[.?!]', text)
m= [sent.split() for sent in m]
for sentence in m:
for i in range(len(sentence)):
sentence[i] = sentence[i].strip('!?.,;:"').lower()
return m
def output(m):
maxi = max([len(word) for sentence in m for word in sentence])
sentence_number = 0
for sentence in m:
sentence_number += 1
print ('предложение №', sentence_number)
words = []
for word in sentence:
if word not in words:
words.append(word)
j = 0
for i in range(0, len(sentence) - 1):
if word == sentence[i]:
j += 1
if j > 1:
print('{:^{maxi}} {:^2}'.format(word,j, maxi = maxi))
output(sentences())
import csv
import random
def open_file():
with open('some.csv', 'r') as f:
a =[]
reader = csv.reader(f)
for line in reader:
a.append(line)
return a
def dictionary(a):
d = {}
for i in range(0,5):
d[a[0][i]] = a[1][i]
return d
def answer(d,a):
word = random.choice(list(d.values()))
for key in d:
if d[key] == word:
print('твоя подсказка:',key)
while True:
ans = input('введи слово')
if ans == word:
return random.choice(a[2])
else:
print(random.choice(a[3]))
print('мы загадали слово для тебя')
print(answer(dictionary(open_file()),open_file()))
import re
def open_text():
words = []
with open('text.txt', 'r', encoding ='utf-8') as f:
text = f.read().lower()
text = text.split()
for item in text:
item = item.strip('.,?!-')
if item not in words:
words.append(item)
return words
def answer(words):
for item in words:
m = re.match( r'\bси(д(и(шь|те?|м)?|е(л(о|а|и)?|в(ш(и(й|ми?|е|х)?|е(го|му?|е|й|ю)|ая|ую))?|ть)|я(т|щ(и(й|ми?|е|х)|е(го|му?|е|й|ю)|ая|ую))?)|жу)\b', item)
if m != None:
print(item)
sit = answer(open_text())
quantity = 0
percent = 0
f = open('newy.txt','r',encoding ='utf-8')
for line in f:
quantity += 1
a = line.split()
if len(a) > 5:
percent += 1
else:
continue
a = []
f.close()
if percent == 0 or quantity == 0:
print(' no lines like this')
else:
print ('the number of lines:', percent / quantity * 100)
import os
def walking():
d = {root : len(files) for root, dirs, files in os.walk('.')}
maxi = max(d.values())
for key in d:
if d[key] == maxi:
print ('there are',maxi,'files in',key)
walking()
import re
def open():
with open('ptitsi.html','r', encoding = 'utf-8') as f:
content = f.read()
return content
def substitute(content):
content = re.sub('<.*?>','', content, flags = re.DOTALL)
content = re.sub(r'(\n| ){2,}','' ,content, flags = re.DOTALL)
content = re.sub('птиц(а(ми?|х)|ы|е(й|ю)?|у)?','рыб\\1', content)
content = re.sub('Птиц(а(ми?|х)|ы|е(й|ю)?|у)?','Рыб\\1', content)
return content
def write(content):
with open('text.txt','w', encoding = 'utf-8') as f:
f.write(content)
print(write(substitute(open()))
import random
def imperative():
with open('imperatives.txt', 'r',encoding = 'utf-8') as f:
imperatives =[]
for line in f:
newword = line.strip()
imperatives.append(newword)
return random.choice(imperatives)
def noun_acc():
with open('nouns_Acc_Sg&Pl.txt', 'r',encoding = 'utf-8') as f:
noun_accs =[]
for line in f:
newword = line.strip()
noun_accs.append(newword)
return random.choice(noun_accs)
def ins_phrase():
with open('clitics_Ins.txt', 'r',encoding = 'utf-8') as f:
clitics = []
for line in f:
newword = line.strip()
clitics.append(newword)
with open('nouns_Ins.txt', 'r',encoding = 'utf-8') as g:
noun_inss = []
for line in g:
newword = line.strip()
noun_inss.append(newword)
return random.choice(clitics) + ' ' + random.choice(noun_inss)
def noun_pl():
with open('nouns_ Nom=Acc_Pl.txt', 'r',encoding = 'utf-8') as f:
noun_pls = []
for line in f:
newword = line.strip()
noun_pls.append(newword)
return random.choice(noun_pls)
def noun_sg():
with open('nouns_Nom=Acc_Sg.txt', 'r',encoding = 'utf-8') as f:
noun_sgs = []
for line in f:
newword = line.strip()
noun_sgs.append(newword)
return random.choice(noun_sgs)
def verb():
with open('verbs_Pl.txt', 'r',encoding = 'utf-8') as f:
verbs = []
for line in f:
newword = line.strip()
verbs.append(newword)
return random.choice(verbs)
def adverb():
with open('adverbs.txt', 'r',encoding = 'utf-8') as f:
adverbs = []
for line in f:
newword = line.strip()
adverbs.append(newword)
return random.choice(adverbs)
def punctuation():
marks = ['.', '!', '...']
return random.choice(marks)
def type1():
return imperative() + ' ' + noun_acc() + punctuation()
def type2():
return noun_pl() + ' ' + verb() + punctuation()
def type3():
return imperative() + ' ' + ins_phrase() + punctuation()
def type4():
return noun_pl() + ' ' + verb() + ' ' + noun_pl() + punctuation()
def type5():
return noun_pl() + ' ' + verb() + ' ' + noun_sg() + punctuation()
def type6():
return ins_phrase() + ' ' + imperative() + ' ' + noun_sg() + punctuation()
def type7():
return imperative() + ' ' + noun_acc() + ' ' + adverb() + punctuation()
def tanka(i):
line =''
if (i == 1) or (i == 3):
line = random.choice([1,2,3])
if line == 1:
line = type1()
if line == 2:
line = type2()
if line == 3:
line = type3()
else:
line = random.choice([4,5,6,7])
if line == 4:
line = type4()
if line == 5:
line = type5()
if line == 6:
line = type6()
if line == 7:
line = type7()
return line
def printing():
for i in range(1,6):
print(tanka(i))
a = printing()
def open_text(text):
with open(text, 'r', encoding ='utf-8') as f:
text = f.read().lower()
words = text.split()
return words
def percent(words, number):
i,j = 0,0
for item in words:
if item[0:2] =='un':
i+=1
if len(item) > number:
j +=1
if i != 0:
print('the number of words:', i)
return str(round(j / i * 100)) + '%'
else:
return 'no matching words were found'
def questions():
text = input(' Please, enter the name of the text')
number = int(input(' Please, enter the lenght'))
words = open_text(text)
answer = percent(words, number)
return answer
print('your result is', questions())
n = int(input( ))
w = 0
i = 0
while w <= n:
w = 2**i
i += 1
if w % 2 == 0 and w <= n:
print (w)
import re
def open_text():
with open('archi.html','r', encoding = 'utf-8') as f:
text = f.read()
return text
def search(text):
m = re.search(r'title="Коды языков".*?title="ISO (\d\d\d)"',text, flags = re.DOTALL)
return m.group(1)
def write(z):
with open('archi.txt','w', encoding = 'utf-8') as f:
f.write(z)
archi = write(search(open_text()))
import os
import re
def search():
count = 0
a =[]
for f in os.listdir():
if os.path.isdir(f) and f not in a:
lat = re.search('.*[a-zA-z].*', str(f))
rus = re.search('.*[а-яА-ЯЁё].*', str(f))
if lat != None and rus != None:
count+=1
a.append(f)
if count == 1:
print('1 dir was found', end = '')
else:
print (count, 'dirs were found ', end ='')
if a != [] :
print( ':'+', '.join(a))
search()
count = 0
arr = ['','','','']
while count < 4:
s = input('vvedi slovo')
arr [ int(count)] += s
s = ''
count += 0.5
for i in range (0,4):
print(arr[i])
a = int(input('введи а'))
b = int(input('введи b'))
c = int(input('введи с'))
if a / b == c:
print('а разделить на b равно с')
else:
print('а разделить на b не равно с')
if a ** b == c:
print(' а в степени b равно c')
else:
print(' а в степени b не равно с')
with open ('hw5.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines ()
list_1 = []
for line in lines:
line = line.split()
n = len (line)
list_1.append (n)
sum_list = 0
sum_line = 0
for elem in list_1:
if elem > 5:
sum_list += 1
sum_line += 1
else:
sum_list += 1
percent = (sum_line / sum_list) * 100
print (percent, '% строк содержит больше 5 слов')
import re
def opentext(text):
with open(text, 'r', encoding = 'utf-8') as f:
sentences = f.read()
text = re.sub('\.(\.\.)?|\?', '!', sentences)
list_ = text.split('!')
return list_
def text_format(text):
text = opentext(text)
text1 = [re.sub('( - )|( — )|( ‒ )', ' ', i) for i in text]
sents = [sent.split() for sent in text1]
sents2 = [[i.strip('.,?!":;
sents3 = [[i.lower() for i in sent] for sent in sents2]
return sents3
def search(text):
sentences = text_format(text)
repeated = [[w for w in sent if sent.count(w) > 1] for sent in sentences]
return repeated
def count(text):
a = search(text)
b = opentext(text)
for i in range(len(a)):
if a[i]:
print (str(b[i]) + '\n')
c = {w : a[i].count(w) for w in a[i]}
keys = c.keys()
for key in keys:
print ('{:^10}'.format(key) + '{:^10}'.format(c[key]))
text = input('Введите название файла: ')
count(text)
import re
def opentext(text):
with open(text, 'r', encoding = 'utf-8') as f:
text = f.readlines()
list_ = []
for line in text:
line = line.split()
list_.extend(line)
words = []
for i in range(len(list_)):
a = list_[i]
a = a.strip('.,?!"":;*()%$
words.append(a)
return words
def find_form():
form = 'си((жу)|д((и((шь)|м|(те?))?)|(е((ть)|(л(а|и|о)?)|(в(ш((и(й|е|х|(ми?))?)|(е((го)|(му?)|й|е)?)|(ая)|(ую))))))|(я(щ((и(й|(ми?)|х|е))|(е((го)|(му?)|й|е))|(ая)|(ую)))?)))'
form2 = 'буд((ут?)|(е(м|(шь)|(те?))))'
words = opentext(text)
forms = []
for i in range(len(words)):
m = re.search(form, words[i])
if m != None:
if words[i] == 'сидеть' and re.search(form2, words[i-1]) != None:
form_fut = words[i-1] + ' ' + words[i]
if form_fut not in forms:
forms.append(form_fut)
else:
continue
else:
if words[i] not in forms:
forms.append(words[i])
else:
continue
else:
continue
return forms
text = input('Введите название файла: ')
m = find_form()
print ('Формы глагола "сидеть", встретившиеся в тексте:')
for i in range(len(m)):
print (m[i], end = '\n')
l = []
for i in range(8):
l.append (input())
print (l[0]+l[1])
print (l[2]+l[3])
print (l[4]+l[5])
print (l[6]+l[7])
import os
import re
def list_files(path):
files_list = []
for d, dirs, files in os.walk(path):
for f in files:
path_f = os.path.join(d, f)
files_list.append(path_f)
return files_list
def open_file(f):
with open(f, 'r', encoding = 'utf-8') as k:
text = k.readlines()
return text
def count_sent(path):
files = list_files(path)
list_sent = {}
for f in files:
b = re.search('(_.*?.xhtml)', f)
f_name = b.group(1)
sent = 0
file_text = open_file(f)
for line in file_text:
if re.search('', line) != None:
sent = sent + 1
list_sent[f_name] = sent
return list_sent
def file_format_sent(path):
sent = count_sent(path)
with open('task1.txt', 'w', encoding = 'utf-8')as k:
for key in sent.keys():
k.write(key + '\t' + str(sent[key]) + '\n')
def inf(f):
text = open_file(f)
inf = {}
for line in text:
author = re.search('content="(.*?)" name="author"', line)
if author != None:
author1 = author.group(1)
for line in text:
topic = re.search('content="(.*?)" name="topic"', line)
if topic != None:
topic1 = topic.group(1)
inf[author1] = topic1
return inf
def create_csv(path):
files = list_files(path)
with open('task2.csv', 'w', encoding = 'utf-8') as k:
for f in files:
infa = inf(f)
f_name = re.search('(_.*?.xhtml)', f).group(1)
for key in infa.keys():
k.write(str(f_name) + '\t' + str(key) + '\t' + str(infa[key]) + '\n')
def pr_loc(f):
text = open_file(f)
bigrams = []
for i in range(len(text)):
pr = re.search('gr="PR"', text[i])
if pr != None:
prep = re.search('(.*?)', text[i]).group(1)
loc = re.search('"S.*?loc', text[i+1])
if loc != None:
S_loc = re.search('(.*?)', text[i+1]).group(1)
bigrams.append(prep + ' ' + S_loc)
return bigrams
def text_without_tegs(f):
text = open_file(f)
text_w_t = ''
for line in text:
if re.search('', line) != None:
word = re.search('(.*?)', line).group(1)
prep = re.search('(.)()?', line)
if prep != None:
if prep.group(1) == '.' or prep.group(1) == '!' or prep.group(1) == '?':
text_w_t = text_w_t + ' ' + word + prep.group(1)+'\n'
else:
text_w_t = text_w_t + ' ' + word + prep.group(1)
else:
text_w_t = text_w_t + ' ' + word
return text_w_t
def bigr(path):
files = list_files(path)
with open('task3.txt', 'w', encoding = 'utf-8') as k:
for f in files:
for b in pr_loc(f):
k.write(b + '\n')
path = 'C:\\Users\\1\\Documents\\ниу вшэ\\КИЛИ и программирование\\python\\экзамен\\news'
file_format_sent(path)
create_csv(path)
bigr(path)
import random
def adjective_Abl_m():
with open('adjective_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def adjective_Abl_f():
with open('adjective_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def noun_Abl_m():
with open('noun_Abl_verse1_m.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def noun_Abl_f():
with open('noun_Abl_verse1_f.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def noun_phrase():
with open('prepositions.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
prep = random.choice(line)
while prep != 'в' and prep != 'к' and prep != 'с':
prep = random.choice(line)
if prep == 'в' or prep == 'к':
with open('noun_verse1_prep1.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun1 = random.choice(noun)
else:
with open('noun_verse1_prep2.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun1 = random.choice(noun)
return prep.title() + ' ' + noun1
def noun_Gen():
with open('noun_Gen_verse1.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def verse11():
return adjective_Abl_m().title() + ' ' + noun_Abl_m()
def verse12():
return adjective_Abl_f().title() + ' ' + noun_Abl_f()
def verse13():
return noun_phrase() + ' ' + noun_Gen()
def participle_adj():
with open('participle_adjective_verse2.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def subject():
with open('subject_verse2.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def place():
with open('places_verse2.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split(', ')
return random.choice(line)
def obj_f():
with open('adjective_obj_verse2_f.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
adj = random.choice(line)
with open('object_verse2_f.txt', 'r', encoding = 'utf-8') as k:
objects = k.readlines()
for obj in objects:
obj = obj.split()
obj = random.choice(obj)
return adj + ' ' + obj
def obj_m():
with open('object_verse2_m.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
obj1 = random.choice(line)
with open('object_Gen_verse2_m.txt', 'r', encoding = 'utf-8') as k:
objects = k.readlines()
for obj in objects:
obj = obj.split()
obj2 = random.choice(obj)
with open('adjective_obj_verse2_m.txt', 'r', encoding = 'utf-8') as l:
adjectives = l.readlines()
for adjective in adjectives:
adjective = adjective.split()
adj = random.choice(adjective)
return adj + ' ' + obj2 + ' ' + obj1
def verse21():
return participle_adj().title() + ' ' + subject() + ' ' + place() + '.'
def verse22():
with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
verb = random.choice(line)
return verb.title() + ' ' + obj_f()
def verse23():
with open('verb_verse2.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
verb = random.choice(line)
return verb.title() + ' ' + obj_m()
def verb_feel():
with open('verb_feelings.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def verse31():
with open('prepositions.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
prep = random.choice(line)
with open('base_noun_verse3.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
base_noun = random.choice(noun)
if prep == 'под' or prep == 'над':
if base_noun == 'мор' or base_noun == 'солнц':
noun = base_noun + 'ем'
else:
noun = base_noun + 'ом'
elif prep == 'у' or prep == 'от' or prep == 'из':
if base_noun == 'мор':
noun = base_noun + 'я'
else:
noun = base_noun + 'а'
elif prep == 'при' or prep == 'на':
noun = base_noun + 'е'
elif prep == 'с':
if base_noun == 'мор' or base_noun == 'солнц':
noun = base_noun + 'ем'
else:
noun = base_noun + 'ом'
prep = 'как с'
elif prep == 'в':
noun = base_noun + 'е'
prep = 'как в'
elif prep == 'к':
if base_noun == 'мор':
noun = base_noun + 'ю'
else:
noun = base_noun + 'у'
prep = 'как к'
else:
if base_noun == 'мор':
noun = base_noun + 'ю'
else:
noun = base_noun + 'у'
return verb_feel().title() + ',' + ' ' + prep + ' ' + noun
def verse32():
with open('participle_verse3.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
participle = random.choice(line)
with open('subject_verse3.txt', 'r', encoding = 'utf-8') as k:
subjects = k.readlines()
for sub in subjects:
sub = sub.split()
subject = random.choice(sub)
return participle.title() + ' ' + subject + '.'
def verse41():
with open('noun_verse41_1.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
noun1 = random.choice(line)
with open('prepositions.txt', 'r', encoding = 'utf-8') as k:
preps = k.readlines()
for preposition in preps:
preposition = preposition.split()
prep = random.choice(preposition)
while prep == 'в' or prep == 'к' or prep == 'с':
prep = random.choice(preposition)
if prep == 'под' or prep == 'над':
with open('noun_verse41_2.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'у' or prep == 'от' or prep == 'из':
with open('noun_verse41_3.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'при':
with open('noun_verse41_4.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'на':
with open('noun_verse41_5.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
else:
with open('noun_verse41_6.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
if noun1 == 'дрожь' or noun1 == 'ночь' or noun1 == 'сталь' or noun1 == 'тень' or noun1 == 'кровь' or noun1 == 'плеть':
with open('verb_verse41_1.txt', 'r', encoding = 'utf-8') as l:
verbs = l.readlines()
for verb in verbs:
verb = verb.split()
verb1 = random.choice(verb)
else:
with open('verb_verse41_2.txt', 'r', encoding = 'utf-8') as l:
verbs = l.readlines()
for verb in verbs:
verb = verb.split()
verb1 = random.choice(verb)
return noun1.title() + ' ' + prep + ' ' + noun2 + ' ' + verb1 + '.'
def noun42():
with open('object_verse42.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
return random.choice(line)
def the_end_of_the_line():
with open('prepositions.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
line.append('во' and 'со' and 'ко')
line.remove('под')
line.remove('у')
line.remove('от')
line.remove('по')
line.remove('из')
prep = random.choice(line)
if prep == 'во':
with open('noun_verse42_1.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'со':
noun = 'мной'
elif prep == 'ко':
with open('noun_verse42_2.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'при' or prep == 'на':
if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'):
with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
while noun2 == 'ней':
noun2 = random.choice(noun)
else:
with open('noun_verse42_3.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
while noun2 == 'нем':
noun2 = random.choice(noun)
elif prep == 'в':
with open('noun_verse42_4.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'с':
with open('noun_verse42_5.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
elif prep == 'к':
with open('noun_verse42_6.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
else:
if noun42() == ('плач' or 'крик' or 'стон' or 'зов' or 'стан' or 'взгляд' or 'прах' or 'плен' or 'хлад'):
with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
while noun2 == 'ней':
noun2 = random.choice(noun)
else:
with open('noun_verse42_7.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun2 = random.choice(noun)
while noun2 == 'нем':
noun2 = random.choice(noun)
return prep.title() + ' ' + noun2
def verse42():
with open('pronoun_verse4.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
pronoun = random.choice(line)
return verb_feel().title() + ' ' + noun42() + ' ' + pronoun + '... ' + the_end_of_the_line()
def verse51():
with open('pronoun_verse5.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
pronoun = random.choice(line)
if pronoun == 'вся' or pronoun == 'та':
with open('adjective_verse5_f_4.txt', 'r', encoding = 'utf-8') as k:
adjectives = k.readlines()
for adjective in adjectives:
adjective = adjective.split()
adj = random.choice(adjective)
with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun1 = random.choice(noun)
elif pronoun == 'весь' or pronoun == 'тот':
with open('adjective_verse5_m_3.txt', 'r', encoding = 'utf-8') as k:
adjectives = k.readlines()
for adjective in adjectives:
adjective = adjective.split()
adj = random.choice(adjective)
with open('noun_verse5_m.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun1 = random.choice(noun)
else:
with open('adjective_verse5_f_3.txt', 'r', encoding = 'utf-8') as k:
adjectives = k.readlines()
for adjective in adjectives:
adjective = adjective.split()
adj = random.choice(adjective)
with open('noun_verse5_f.txt', 'r', encoding = 'utf-8') as l:
nouns = l.readlines()
for noun in nouns:
noun = noun.split()
noun1 = random.choice(noun)
return pronoun.title() + ' ' + adj + ' ' + noun1 + '.'
def verse52():
with open('parenthesis_verse5.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.split()
parenthesis = random.choice(line)
with open('noun_verse52.txt', 'r', encoding = 'utf-8') as k:
nouns = k.readlines()
for noun in nouns:
noun = noun.split()
noun1 = random.choice(noun)
if noun1 == 'звезда' or noun1 == 'вуаль' or noun1 == 'туман':
with open('verb_verse52_sg.txt', 'r', encoding = 'utf-8') as l:
verbs = l.readlines()
for verb in verbs:
verb = verb.split()
verb1 = random.choice(verb)
else:
with open('verb_verse52_pl.txt', 'r', encoding = 'utf-8') as l:
verbs = l.readlines()
for verb in verbs:
verb = verb.split()
verb1 = random.choice(verb)
return parenthesis.title() + ' ' + noun1 + ' ' + verb1 + '?!'
def poem():
variant = random.choice([1, 2, 3, 4, 5, 6])
if variant == 1:
var = random.choice([1, 2])
if var == 1:
return verse11() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52()
else:
return verse12() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52()
elif variant == 2:
var = random.choice([1, 2])
if var == 1:
return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
else:
return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
elif variant == 3:
var = random.choice([1, 2, 3, 4])
if var == 1:
return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
elif var == 2:
return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
elif var == 3:
return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
else:
return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
elif variant ==4:
return verse13() + '\n' + verse21() + '\n' + verse31() + '\n' + verse41() + '\n' + verse52()
elif variant == 5:
var = random.choice([1, 2])
if var == 1:
return verse13() + '\n' + verse22() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
else:
return verse13() + '\n' + verse23() + '\n' + verse32() + '\n' + verse41() + '\n' + verse52()
else:
var = random.choice([1, 2, 3, 4])
if var == 1:
return verse11() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
elif var == 2:
return verse12() + '\n' + verse22() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
elif var == 3:
return verse11() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
else:
return verse12() + '\n' + verse23() + '\n' + verse32() + '\n' + verse42() + '\n' + verse51()
print (poem())
import re
def open_file():
with open('Птицы.html', 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def sub_word():
word1 = '\\bпти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b'
word2 = '\\bПти́?ц(((а(х|ми?)?)|ей?|ы|у)?)\\b'
s = re.sub(word1, 'рыб\\1', open_file())
m = re.sub(word2, 'Рыб\\1', s)
return m
def add_file():
with open('Замена.html', 'w', encoding = 'utf-8') as k:
k.write(sub_word())
return k
add_file()
def data (year, month, day):
if month > 12:
return False
else:
if day >= 31:
return False
else:
if day == 31 and (month == 2 or month == 4 or month == 9 or month == 11 or month == 6):
return False
else:
if day == 30 and month == 2:
return False
else:
if day == 29 and month == 2 and (year % 4 != 0 or (year % 100 == 0 and year % 1000 != 0)):
return False
elif day == 16 and month == 12 and year == 1998:
print ("Вы угадали день рождения разработчика! Не забудьте его поздравить :)")
else:
return True
year = input ("Введите год (натуральное число): ")
month = input ("Введите месяц (натуральное число до 12 включительно): ")
day = input ("Введите день (натуральное число до 31 включительно): ")
while year and month and day:
if data (int(year), int(month), int(day)) == True:
print ("Такая дата есть в календаре:)")
elif data (int(year), int(month), int(day)) == False:
print ("Простите, но такой даты нету:(")
else:
print (data (int(year), int(month), int(day)))
print ("Попробуем снова:)")
year = input ("Введите год (натуральное число): ")
month = input ("Введите месяц (натуральное число до 12 включительно): ")
day = input ("Введите день (натуральное число до 31 включительно): ")
print ("Все!:)")
a = int (input ())
b = int (input ())
c = int (input ())
s = (a + 1) // 2 + (b + 1) // 2 + (c + 1) // 2
print (s)
print (os.path.abspath('.'))
print (os.getcwd())
os.path.join('texts', '1.txt')
os.path.exists('texts')
print (os.listdir('.'))
s = 'hello'
i = 1
texts = [f for f in os.listdir('.') if f.endswith('.txt')]
print (texts)
for f in os.listdir('.'):
if f.endswith('.txt'):
with open(f, 'a', encoding = 'utf-8') as w:
w.write (s*i)
i += 1
os.mkdir('corpus1')
os.makedirs('a\\b\\long\\long')
os.rename('texts\\1.txt', 'texts\\2.txt')
os.path.isfile(r'texts\corpus1.txt')
os.path.isdir(r'texts')
shutil.copy(r'texts\2.txt', r'new_corpus\2.txt')
shutil.move('откуда', 'куда')
shutil.copytree('папка', 'папка2')
os.remove(r'new_corpus\2.txt')
shutil.rmtree('corpus')
def align_right(arr):
for i in arr:
print ('{:>40}'.format(i))
arr = ['abba', 'assa', 'adda', 'affa']
align_right(arr)
def tokenize(text):
tokens = text.split()
tokens1 = [t.strip('.,?!":;*()-— ') for t in tokens]
tokens2 = [t.lower() for t in tokens1]
return tokens2
text = 'Инициатива публикации лучших дисциплин исходила в том числе от Студсовета. Чуть ранее представители Студенческого совета получили возможность использовать результаты СОП при обсуждении возникающих проблем и спорных моментов. Теперь все студенты смогут использовать опубликованную информацию — агрегированное мнение своих предшественников — при формировании собственной индивидуальной образовательной траектории.'
print(tokenize(text))
def tabulate(a):
for i in range(0, len(a)):
print('{:<10}'.format(a[i][0]) + '{:^10}'.format(a[i][1]) + '{:>10}'.format(a[i][2]))
a = [('кошки','собаки','коровы'), ('мяу','гав','му'), (3,3,2)]
tabulate(a)
x = int (input ())
if x > 0:
sign = 1
elif x < 0:
sign = -1
else:
sign = 0
print (sign)
a = int (input ())
b = int (input ())
if a < b:
print (a)
else:
print (b)
x = int (input ('введите целое число x = '))
print ('вы ввели число', x)
res = x*55/100+33
print ('результат вычислений x * 55 / 100 + 33 =', res)
a = int (input ('введите длину первого катета a = '))
b = int (input ('введите длину второго катета b = '))
S = a * b / 2
print (S)
import re
def func1(regw, word1):
word = input('Введите слово: ')
m = re.search(regw, word)
if m != None:
return 'Данное слово является формой слова ' + word1
else:
return 'Данное слово не является формой слова ' + word1
word1 = 'свобода'
regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b'
def if_any(s, regw):
m = re.search(regw, s)
s = s.split()
p = []
for i in range(len(s)):
m = re.search(regw, s[i])
if m != None:
p = p.append(s[i])
else:
continue
return 'Слово встречается в тексте ' + len(p) + ' раз'
s = 'Свободу попугаям!'
print(if_any(s, regw))
import re
import os
import shutil
import re
def make_folders_sent(s):
sent = s.split()
b = '\\'.join(sent)
os.makedirs(b)
s = input('Пожалуйста, введите предложение (без знаков препинания!) \n')
make_folders_sent(s)
def make_folders_num(n):
for i in range(1,n+1):
os.mkdir(str(i))
for a in range(i):
name = str(i) + '\\' + str(a+1) + '.txt'
file = open(name, 'w', encoding = 'utf-8')
file.write('Hello!')
n = int(input('Пожалуйста, введите натуральное число \n'))
make_folders_num(n)
def count():
filelist = [f for f in os.listdir('.') if os.path.isfile(f)]
exts = []
for f in filelist:
ext = f.split('.')[-1]
exts.append(ext)
c = {e : exts.count(e) for e in exts}
keys = c.keys()
for key in keys:
print('{:^10}'.format(key) + '{:^10}'.format(c[key]))
count()
name = input ('Введите ваше имя: ')
age = input ('Сколько вам лет? ')
colour = input ('Какой ваш любимый цвет? ')
music = input ('Кто ваш любимый музыкальный исполнитель? ')
dream = input ('Какова ваша заветная мечта? ')
with open ('information.txt', 'w', encoding = 'utf-8') as f:
f.write ('Информация о соседе\n')
f.write (name + '\n' + age + '\n' + colour + '\n' + music + '\n' + dream)
with open('Austen_Jane_Pride_and_Prejudice.txt', 'r', encoding = 'utf-8') as f:
text = f.readlines()
list_ = []
for line in text:
line = line.split()
list_.extend(line)
print (list_)
import re
with open ('freq.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines ()
for line in lines:
if 'союз' in line:
print (line)
with open ('freq.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines ()
a = []
for line in lines:
line = line.split ()
if 'жен' in line and 'ед' in line:
print (line[0], end = ', ')
a.append (line[-1])
ipm_sum = 0
for elem in a:
elem = float (elem)
ipm_sum += elem
print (ipm_sum)
with open ('freq.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines ()
word = input ()
while word:
for line in lines:
line = line.split()
if word in line:
print ('Морфологическая информация: ' + ' '.join (line[2:-2]))
print ('IPM = ' + line[-1])
word = input ()
import random
with open ('words.txt', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
random.shuffle(lines)
score = 0
for line in lines:
line = line.strip ()
word, hint = line.split(' ', 1)
response = input ('Какое слово я загадала?\n ' + 'Подсказка: ' + hint + ' ')
if response == word:
print ('Правильно, молодец!')
score += 1
else:
print ('А вот и нет, слово было ', word)
with open ('scores.txt', 'w', encoding = 'utf-8') as n:
percent = score / 5 * 100
n.write('Вот результат\n')
n.write(str(percent) + '%')
import re
def func1(regw, word1):
word = input('Введите слово: ')
m = re.search(regw, word)
if m != None:
return 'Данное слово является формой слова ' + word1
else:
return 'Данное слово не является формой слова ' + word1
word1 = 'свобода'
regw = r'\b(с|С)вобод(ы|е|у|ой|а((ми?)|х)?)\b'
def if_any(s, regw):
m = re.search(regw, s)
s = s.split()
p = []
for i in range(len(s)):
m = re.search(regw, s[i])
if m != None:
p = p.append(s[i])
else:
continue
return 'Слово встречается в тексте ' + len(p) + ' раз'
s = 'Свободу попугаям!'
print(if_any(s, regw))
import re
def opentext(a):
with open(a, 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def delete_tags():
s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL)
return s
def delete_odd():
s = re.sub ('(\\s)+', '\\1', delete_tags())
return s
name = input('Введите название файла: ')
print (delete_odd())
n = int (input ())
hour = n // 60
if hour >= 24:
k = hour // 24
hour = hour - k * 24
minute = n % 60
print (hour, minute)
import re
def opentext(a):
with open (a, 'r', encoding = 'utf-8') as f:
content = f.read()
return content
def find_all_links():
reg = r'(.*?)'
links = re.findall(reg, opentext(a))
return links
a = input('Введите название файла: ')
def pictures():
reg = r'(.*?) '
pictures = re.findall(reg, opentext(a))
return pictures
pictures = pictures()
print ('Подписи к картинкам: ')
for picture in pictures:
print (picture[2])
def opentext(fname):
with open(fname, 'r', encoding = 'utf-8') as f:
text = f.readlines()
for line in text:
line = line.split()
list_ = []
for i in range (0, len(line)):
a = line[i]
a = a.lower()
a = a.strip('.,?!";:"*()')
list_.append(a)
return list_
def first_letter(letter):
fname = input('введите название файла: ')
text = opentext(fname)
words_letter = []
for i in range(len(text)):
if text[i].startswith(letter) == True:
words_letter.append(text[i])
else:
continue
return words_letter
def questions():
letter = input('введите первую букву: ')
number = int(input('введите число: '))
words = first_letter(letter)
result = []
for i in range(len(words)):
if len(words[i]) > number:
result.append(words[i])
else:
continue
return result
print (questions())
a = int (input ('введите первое число '))
b = int (input ('введите второе число '))
c = int (input ('введите третье число '))
s = a + b + c
print (s)
import os
def drawtree():
for root, dirs, files in os.walk('C:\\Users\\1\\Documents\\ниу вшэ'):
num = root.count('\\')
new_root = root.split('\\')[-1]
print('\t'*num+'--'+new_root+'\n')
for f in files:
print((num+1)*'\t'+f)
drawtree()
name = input ()
print ('Hello, ' + name + '!')
import re
def opentext(a):
with open(a, 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def delete_tags():
s = re.sub ('<.*?>', '', opentext(name), flags = re.DOTALL)
return s
def delete_odd():
s = re.sub ('(\\s)+', '\\1', delete_tags())
return s
name = input('Введите название файла: ')
print (delete_odd())
import re
def opentext(a):
with open (a, 'r', encoding = 'utf-8') as f:
content = f.read()
return content
def find_all_links():
reg = r'(.*?)'
links = re.findall(reg, opentext(a))
return links
a = input('Введите название файла: ')
def pictures():
reg = r'(.*?) '
pictures = re.findall(reg, opentext(a))
return pictures
pictures = pictures()
print ('Подписи к картинкам: ')
for picture in pictures:
print (picture[2])
num = int (input ())
t = 1
while t * 2 <= num:
t = t * 2
print (t)
def opentext(fname):
with open(fname, 'r', encoding = 'utf-8') as f:
text = f.readlines()
list_ = []
for line in text:
line = line.split()
list_.extend(line)
words = []
for i in range(len(list_)):
a = list_[i]
a = a.lower()
a = a.strip('.,?!";:"*()')
words.append(a)
return words
def un_forms():
text = opentext(fname)
words_un = []
for i in range(len(text)):
if text[i].startswith('un') == True:
words_un.append(text[i])
else:
continue
return words_un
def quantity():
words = un_forms()
return len(words)
def percentage(number):
words = un_forms()
s = 0
for i in range(len(words)):
if len(words[i]) > number:
s += 1
else:
continue
result = s / len(words) * 100
return result
fname = input('Введите название файла: ')
number = int(input('Введите число: '))
print ('Количество слов с приставкой un- равно ', quantity())
print ('Процент слов с приставкой un- длинее ', number, ' равен ', percentage(number))
import os
import shutil
import re
def all_files():
ff = os.listdir('.')
file_names = []
for f in ff:
if os.path.isfile(f):
a = f.split('.')
if a[-1].isdigit() or re.search(r'\s', a[-1]) != None:
a = '.'.join(a)
elif len(a) > 2:
a[0] = '.'.join(a[:-1])
name = a[0]
file_names.append(name)
return file_names
def all_dirs():
ff = os.listdir('.')
dir_names = []
for f in ff:
if os.path.isdir(f):
name = f
dir_names.append(name)
return dir_names
def all_without_rep():
names_file = all_files()
names_dir = all_dirs()
names = names_file + names_dir
names_1 = []
for name in names:
if name not in names_1:
names_1.append(name)
return names_1
def out_nice():
names = all_without_rep()
print('Список папок и файлов в текущей директории: ')
for name in names:
print (name)
def cyrill_latin_symb_fold():
names = all_dirs()
lat = '[a-zA-Z]'
cyr = '[а-яА-Я]'
cyr_lat_dirs = [name for name in names if re.search(lat, name) != None and re.search(cyr, name) != None]
return len(cyr_lat_dirs)
out_nice()
print ('Количество папок, содержащих и латинские, и кириллические символы, равно: ', cyrill_latin_symb_fold())
import random
def file():
with open('dictionary.csv', 'r', encoding = 'utf-8') as f:
lines = f.readlines()
d = {}
for line in lines:
line = line.split(';')
d[line[0]] = line[1].strip('\n')
return d
def right():
with open('Верные ответы.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.split('\n')
return random.choice(text)
def wrong():
with open('Неверные ответы.txt', 'r', encoding = 'utf-8') as f:
text = f.read()
text = text.split('\n')
return random.choice(text)
def zagadka(d):
keys = d.keys()
keys = list(keys)
key = random.choice(keys)
print ('Подсказка: ' + key + '...')
answer = input('Введите ответ: ')
if answer == d[key]:
return(right())
else:
return(wrong() + ' Верный ответ ' + d[key] + '.')
d = file()
a = input('Хочешь поиграть? Введи "да" или "нет":)\n')
while a == 'да':
print(zagadka(d))
a = input('Хочешь сыграть еще раз?:) Введи "да" или "нет"\n')
print ('До свидания!')
a = int (input ())
b = int (input ())
c = int (input ())
if a / b == c:
print (a, "разделить на", b, "равно", c)
else:
print (a, "разделить на", b, "не равно", c)
if a ** b == c:
print (a, "в степени", b, "равно", c)
else:
print (a, "в степени", b, "не равно", c)
import os
def max_dir():
a = {}
for root, dirs, files in os.walk(os.path.abspath('.')):
a[root] = len(files)
max_v = max(a.values())
if max_v == 1:
print('Наибольшее количество файлов (' + str(max_v) +' файл) в директориях: ')
elif max_v == 2 or max_v == 3 or max_v == 4:
print('Наибольшее количество файлов (' + str(max_v) +' файла) в директориях: ')
else:
print('Наибольшее количество файлов (' + str(max_v) +' файлов) в директориях: ')
for key in a.keys():
if a[key] == max_v:
print(key)
max_dir()
import re
def open_file(a):
with open(a, 'r', encoding = 'utf-8') as f:
text = f.read()
return text
def find_ISO():
reg = 'ISO 639-3(\\w{3})'
m = re.search(reg, open_file(a), flags = re.DOTALL)
if m:
ISO = m.group(2)
return ISO
else:
return 'В статье не указано ISO 639-3'
def add_file():
with open('Result.txt', 'a', encoding = 'utf-8') as k:
k.write('\n')
k.write(a)
k.write(': ')
k.write(find_ISO())
return k
a = input('Введите название статьи в формате Название.html: ')
add_file()
word = input ('введите слово: ')
for i in range (1,len(word)):
print (word[i:-i])
with open('dict.csv', 'r', encoding='utf-8') as f:
lines = f.readlines()
a = dict()
for line in lines:
line = line.strip('\n')
key, value = line.split(':', 1)
a[key] = value
for key in a:
b = input('Угадай слово. Вот подсказка: '+a[key])
if b == key:
print('Правильно!')
else:
t = 0
while b != key and t <= (len(key)-1):
b = input('Неправильно, попробуй еще раз: ')
t += 1
else:
print('Правильно!')
import os
def greatestway():
depth = []
for root, dirs, files in os.walk('.', topdown=False):
a = str(root).count('/')
if a not in depth:
depth.append(a)
return max(depth)
print(greatestway())
import re
import os
def openfile():
for root, dirs, files in os.walk('.\\news2'):
for f in files:
with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text:
file_text = text.read()
file_text = re.sub('<.*?>', '', file_text)
file_text2 = file_text.split('.')
count = len(file_text2)
print(f, ' ', count)
return
def meta():
for root, dirs, files in os.walk('.\\news2'):
for f in files:
with open(os.path.join(root, f), 'r', encoding='Windows-1251') as text:
file_text = text.read()
writer = re.match('', file_text).group(1)
topic = re.match('', file_text).group(1)
with open('.\\table.csv', 'w', encoding='utf-8') as csv_f:
heading_string = 'Файл' + ' ' + 'Автор' + ' ' + 'Тема'
csv_f.write(heading_string)
with open('.\\table.csv', 'a', encoding='utf-8') as csv_writer:
string = f + ' ' + writer + ' ' + topic
csv_writer.write(string)
return
print(openfile())
print(meta())
import re
def openf():
with open('F.xml', 'r', encoding='utf=8') as f:
lines = f.readlines()
return lines
def countli():
lines = openf()
linecount = 0
for line in lines:
linecount += 1
results = 'result.txt'
with open(results, 'w', encoding='utf-8') as n:
n.write(str(linecount))
return results
def dicfreq():
lines = openf()
types = []
for line in lines:
l = str(line)
if 'lemma' in l:
reg = re.search(r'', l)
types.append(reg.group(2))
freq = {}
for i in range(len(types)):
if types[i] not in freq:
freq[types[i]] = 1
else:
freq[types[i]] += 1
with open('keys.txt', 'w', encoding='utf-8') as te:
te.write('\n'.join(freq.keys()))
return freq
print(countli(), dicfreq())
import re
fname = input('Введите название файла: ')
def openfile(fname):
with open(fname,'r', encoding='utf-8') as f:
text = f.read()
return text
def sentences():
text = openfile(fname)
text = text.strip()
se = re.split('\\b[.!?\\n]+(?=\\s)', text)
return se
def find8():
se = sentences()
greater7 = []
for i in se:
words = i.split(' ')
words = [str(w).strip('?!&(),.:;«»\n”“ ') for w in words]
greater = []
greater += [w for w in words if len(w) > 7]
template = '{} {:->10}'
for g in greater:
print(template.format(g,len(g)))
return
print(find8())
import re
import os
import shutil
filename = []
unique = []
name = ''
def numberinf():
number = 0
for f in os.listdir('REALEC'):
name = str(f)
b = re.sub(r'\.\D+', '', name)
c = re.search(r'\d', b)
if c != None:
number += 1
return number
def foldername():
for f in os.listdir('REALEC'):
name = str(f)
b = re.sub(r'\.\D+', '', name)
filename.append(b)
for n in filename:
if n != '' and n not in unique:
unique.append(n)
return unique
print(numberinf(), foldername())
import random
with open('words.txt', 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
def nm1():
nm1 = list()
nm1 = lines[0]
nm1 = nm1.strip()
nm1 = nm1.split(' ')
nm1.remove(nm1[0])
return random.choice(nm1)
def nf1():
nf1 = list()
nf1 = lines[1]
nf1 = nf1.strip()
nf1 = nf1.split(' ')
nf1.remove(nf1[0])
return random.choice(nf1)
def nm2():
nm2 = list()
nm2 = lines[2]
nm2 = nm2.strip()
nm2 = nm2.split(' ')
nm2.remove(nm2[0])
return random.choice(nm2)
def nf2():
nf2 = list()
nf2 = lines[3]
nf2 = nf2.strip()
nf2 = nf2.split(' ')
nf2.remove(nf2[0])
return random.choice(nf2)
def nm3():
nm3 = list()
nm3 = lines[4]
nm3 = nm3.strip()
nm3 = nm3.split(' ')
nm3.remove(nm3[0])
return random.choice(nm3)
def nf3():
nf3 = list()
nf3 = lines[5]
nf3 = nf3.strip()
nf3 = nf3.split(' ')
nf3.remove(nf3[0])
return random.choice(nf3)
def nm4():
nm4 = list()
nm4 = lines[6]
nm4 = nm4.strip()
nm4 = nm4.split(' ')
nm4.remove(nm4[0])
return random.choice(nm4)
def nf4():
nf4 = list()
nf4 = lines[7]
nf4 = nf4.strip()
nf4 = nf4.split(' ')
nf4.remove(nf4[0])
return random.choice(nf4)
def nm5():
nm5 = list()
nm5 = lines[8]
nm5 = nm5.strip()
nm5 = nm5.split(' ')
nm5.remove(nm5[0])
return random.choice(nm5)
def nf5():
nf5 = list()
nf5 = lines[9]
nf5 = nf5.strip()
nf5 = nf5.split(' ')
nf5.remove(nf5[0])
return random.choice(nf5)
def nm6():
nm6 = list()
nm6 = lines[10]
nm6 = nm6.strip()
nm6 = nm6.split(' ')
nm6.remove(nm6[0])
return random.choice(nm6)
def nf6():
nf6 = list()
nf6 = lines[11]
nf6 = nf6.strip()
nf6 = nf6.split(' ')
nf6.remove(nf6[0])
return random.choice(nf6)
def adjm1():
adjm1 = list()
adjm1 = lines[12]
adjm1 = adjm1.strip()
adjm1 = adjm1.split(' ')
adjm1.remove(adjm1[0])
return random.choice(adjm1)
def adjm2():
adjm2 = list()
adjm2 = lines[13]
adjm2 = adjm2.strip()
adjm2 = adjm2.split(' ')
adjm2.remove(adjm2[0])
return random.choice(adjm2)
def adjf2():
adjf2 = list()
adjf2 = lines[14]
adjf2 = adjf2.strip()
adjf2 = adjf2.split(' ')
adjf2.remove(adjf2[0])
return random.choice(adjf2)
def adjm3():
adjm3 = list()
adjm3 = lines[15]
adjm3 = adjm3.strip()
adjm3 = adjm3.split(' ')
adjm3.remove(adjm3[0])
return random.choice(adjm3)
def adjf3():
adjf3 = list()
adjf3 = lines[16]
adjf3 = adjf3.strip()
adjf3 = adjf3.split(' ')
adjf3.remove(adjf3[0])
return random.choice(adjf3)
def adjm4():
adjm4 = list()
adjm4 = lines[17]
adjm4 = adjm4.strip()
adjm4 = adjm4.split(' ')
adjm4.remove(adjm4[0])
return random.choice(adjm4)
def adjf4():
adjf4 = list()
adjf4 = lines[18]
adjf4 = adjf4.strip()
adjf4 = adjf4.split(' ')
adjf4.remove(adjf4[0])
return random.choice(adjf4)
def adjm5():
adjm5 = list()
adjm5 = lines[19]
adjm5 = adjm5.strip()
adjm5 = adjm5.split(' ')
adjm5.remove(adjm5[0])
return random.choice(adjm5)
def adjf5():
adjf5 = list()
adjf5 = lines[20]
adjf5 = adjf5.strip()
adjf5 = adjf5.split(' ')
adjf5.remove(adjf5[0])
return random.choice(adjf5)
def adjm6():
adjm6 = list()
adjm6 = lines[21]
adjm6 = adjm6.strip()
adjm6 = adjm6.split(' ')
adjm6.remove(adjm6[0])
return random.choice(adjm6)
def adjf6():
adjf6 = list()
adjf6 = lines[22]
adjf6 = adjf6.strip()
adjf6 = adjf6.split(' ')
adjf6.remove(adjf6[0])
return random.choice(adjf6)
def v1():
v1 = list()
v1 = lines[23]
v1 = v1.strip()
v1 = v1.split(' ')
v1.remove(v1[0])
return random.choice(v1)
def v2():
v2 = list()
v2 = lines[24]
v2 = v2.strip()
v2 = v2.split(' ')
v2.remove(v2[0])
return random.choice(v2)
def v3():
v3 = list()
v3 = lines[25]
v3 = v3.strip()
v3 = v3.split(' ')
v3.remove(v3[0])
return random.choice(v3)
def v4():
v4 = list()
v4 = lines[26]
v4 = v4.strip()
v4 = v4.split(' ')
v4.remove(v4[0])
return random.choice(v4)
def v5():
v5 = list()
v5 = lines[27]
v5 = v5.strip()
v5 = v5.split(' ')
v5.remove(v5[0])
return random.choice(v5)
def v6():
v6 = list()
v6 = lines[28]
v6 = v6.strip()
v6 = v6.split(' ')
v6.remove(v6[0])
return random.choice(v6)
def partm3():
partm3 = list()
partm3 = lines[29]
partm3 = partm3.strip()
partm3 = partm3.split(' ')
partm3.remove(partm3[0])
return random.choice(partm3)
def partm4():
partm4 = list()
partm4 = lines[30]
partm4 = partm4.strip()
partm4 = partm4.split(' ')
partm4.remove(partm4[0])
return random.choice(partm4)
def partf4():
partf4 = list()
partf4 = lines[31]
partf4 = partf4.strip()
partf4 = partf4.split(' ')
partf4.remove(partf4[0])
return random.choice(partf4)
def partm5():
partm5 = list()
partm5 = lines[32]
partm5 = partm5.strip()
partm5 = partm5.split(' ')
partm5.remove(partm5[0])
return random.choice(partm5)
def partf5():
partf5 = list()
partf5 = lines[33]
partf5 = partf5.strip()
partf5 = partf5.split(' ')
partf5.remove(partf5[0])
return random.choice(partf5)
def partm6():
partm6 = list()
partm6 = lines[34]
partm6 = partm6.strip()
partm6 = partm6.split(' ')
partm6.remove(partm6[0])
return random.choice(partm6)
def partf6():
partf6 = list()
partf6 = lines[35]
partf6 = partf6.strip()
partf6 = partf6.split(' ')
partf6.remove(partf6[0])
return random.choice(partf6)
def conj1():
conj1 = list()
conj1 = lines[36]
conj1 = conj1.strip()
conj1 = conj1.split(' ')
conj1.remove(conj1[0])
return random.choice(conj1)
def conj2():
conj2 = list()
conj2 = lines[37]
conj2 = conj2.strip()
conj2 = conj2.split(' ')
conj2.remove(conj2[0])
return random.choice(conj2)
def numm2():
numm2 = list()
numm2 = lines[38]
numm2 = numm2.strip()
numm2 = numm2.split(' ')
numm2.remove(numm2[0])
return random.choice(numm2)
def numf2():
numf2 = list()
numf2 = lines[39]
numf2 = numf2.strip()
numf2 = numf2.split(' ')
numf2.remove(numf2[0])
return random.choice(numf2)
def numm3():
numm3 = list()
numm3 = lines[40]
numm3 = numm3.strip()
numm3 = numm3.split(' ')
numm3.remove(numm3[0])
return random.choice(numm3)
def numf3():
numf3 = list()
numf3 = lines[41]
numf3 = numf3.strip()
numf3 = numf3.split(' ')
numf3.remove(numf3[0])
return random.choice(numf3)
def numm4():
numm4 = list()
numm4 = lines[42]
numm4 = numm4.strip()
numm4 = numm4.split(' ')
numm4.remove(numm4[0])
return random.choice(numm4)
def numf4():
numf4 = list()
numf4 = lines[43]
numf4 = numf4.strip()
numf4 = numf4.split(' ')
numf4.remove(numf4[0])
return random.choice(numf4)
def numm5():
numm5 = list()
numm5 = lines[44]
numm5 = numm5.strip()
numm5 = numm5.split(' ')
numm5.remove(numm5[0])
return random.choice(numm5)
def numf5():
numf5 = list()
numf5 = lines[45]
numf5 = numf5.strip()
numf5 = numf5.split(' ')
numf5.remove(numf5[0])
return random.choice(numf5)
def adv2():
adv2 = list()
adv2 = lines[46]
adv2 = adv2.strip()
adv2 = adv2.split(' ')
adv2.remove(adv2[0])
return random.choice(adv2)
def adv3():
adv3 = list()
adv3 = lines[47]
adv3 = adv3.strip()
adv3 = adv3.split(' ')
adv3.remove(adv3[0])
return random.choice(adv3)
def adv4():
adv4 = list()
adv4 = lines[48]
adv4 = adv4.strip()
adv4 = adv4.split(' ')
adv4.remove(adv4[0])
return random.choice(adv4)
def adv5():
adv5 = list()
adv5 = lines[49]
adv5 = adv5.strip()
adv5 = adv5.split(' ')
adv5.remove(adv5[0])
return random.choice(adv5)
def adv6():
adv6 = list()
adv6 = lines[50]
adv6 = adv6.strip()
adv6 = adv6.split(' ')
adv6.remove(adv6[0])
return random.choice(adv6)
def random_line_5_1():
sentence5_1 = [adjm4() + ' ' + nm1(), adjm3() + ' ' + nm2(), adjm2() + ' ' + nm3(), adjm4() + ' ' + nm1(),
adjf4() + ' ' + nf1(), adjf3() + ' ' + nf2(), adjf2() + ' ' + nf2(), adjf4() + ' ' + nf1(),
partm3() + ' ' + nm2(), partm4() + ' ' + nm1(), partf4() + ' ' + nf1(),
nm5(), nf5(),
numm2() + ' ' + adjm1() + ' ' + nm2(), numm2() + ' ' + adjm2() + ' ' + nm1(),
numm3() + ' ' + adjm1() + ' ' + nm1(), numm4() + ' ' + nm1(),
numf2() + ' ' + adjf2() + ' ' + nf1(), numf2() + ' ' + nf3(), numf3() + ' ' + nf2()]
return random.choice(sentence5_1)
def random_line_7_1():
sentence7_1 = [adjm6() + ' ' + nm1(), adjm5() + ' ' + nm2(), adjm4() + ' ' + nm3(), adjm3() + ' ' + nm4(),
adjm2() + ' ' + nm5(), adjm1() + ' ' + nm6(),
adjf6() + ' ' + nf1(), adjf5() + ' ' + nf2(), adjf4() + ' ' + nf3(), adjf3() + ' ' + nf4(),
adjf2() + ' ' + nf5(),
partm6() + ' ' + nm1(), partm5() + ' ' + nm2(), partm4() + ' ' + nm3(), partm3() + ' ' + nm4(),
partf6() + ' ' + nf1(), partf5() + ' ' + nf2(), partf4() + ' ' + nf3()]
return random.choice(sentence7_1)
def random_line_7_2():
sentence7_2 = [v6() + ' ' + conj1(), adv2() + ' ' + v4() + ' ' + conj1(), adv3() + ' ' + v3() + ' ' + conj1()]
return random.choice(sentence7_2)
def random_line_5_2():
sentence5_2 = [v5(), adv2() + ' ' + v3(), adv3() + ' ' + v2(), adv4() + ' ' + v1()]
return random.choice(sentence5_2)
def random_line_7_3():
sentence7_3 = [adv2() + ' ' + v5(), adv3() + ' ' + v4(), adv4() + ' ' + v3(), adv5() + ' ' + v2(),
adv6() + ' ' + v1()]
return random.choice(sentence7_3)
def poem():
p = [random_line_5_1() + '.\n' + random_line_7_1() + '.\n' + random_line_5_1(),
random_line_5_1() + '\n' + random_line_7_2() + '\n' + random_line_5_2(),
random_line_5_1() + '\n' + random_line_7_3() + '.\n' + random_line_5_1(),
random_line_5_1() + '\n' + random_line_7_3() + ',\n' + random_line_5_2()]
return random.choice(p)
print(poem()+'.')
fname = input('Введите название файла: ')
def openfile(fname):
with open(fname, 'r', encoding='utf-8') as f:
text = f.read()
text = text.lower()
text = text.strip()
words = []
words = text.split(' ')
return words
def count_words(fname):
words = openfile(fname)
n = 0
for word in words:
word = word.strip('?!@
n += 1
return n
def dicff(fname):
words = openfile(fname)
words.sort()
fr = dict()
for index in range(len(words)):
if words[index] in fr:
fr[words[index]] += 1
else:
fr[words[index]] = 1
return fr
print(count_words(fname), dicff(fname))
import re
fname = input('Введите название файла: ')
def openfile(fname):
with open(fname, 'r', encoding='utf-8') as f:
text = f.read()
text = text.lower()
text = text.strip()
words = []
words = text.split(' ')
return words
def words(fname):
words = openfile(fname)
a = []
for word in words:
word = word.strip('?!@
a.append(word)
return a
regex = r'\bоткр(ыл[аи]?|о(ют?|е(шь|т|м|те))|ыть)\b'
def formsearch(regex):
wordlist = words(fname)
match = []
for i in wordlist:
i1 = str(i)
m = re.search(regex, i1)
if m != None:
match.append(i)
strmatch = '\n'.join(match)
return strmatch
print(formsearch(regex))
fname = input('Введите название файла: ')
def openfile(fname):
with open(fname, 'r', encoding='utf-8') as f:
text = f.read()
text = text.lower()
text = text.strip()
words = []
words = text.split(' ')
return words
def ingform(fname):
words = openfile(fname)
a = []
for word in words:
word = word.strip('?!@
if word.endswith('ing'):
a.append(word)
else:
continue
return a
theword = input('Введите слово: ')
def searching(theword):
s = ingform(fname)
b = 0
for i in s:
if i == theword:
b += 1
else:
continue
return b
print(ingform(fname))
print(searching(theword))
import re
fname = input('Введите название файла: ')
def open_html(fname):
with open(fname, 'r', encoding='utf-8') as f:
text = f.read()
return text
def find_capital(fname):
text = open_html(fname)
card = re.search(r'', text)
if card != None:
capital = re.search(r'data-wikidata-property-id="P36"(.*?)(.*?)', text)
if capital != None:
return capital.group(3)
def find_country(fname):
text = open_html(fname)
card = re.search(r'', text)
if card != None:
country = re.search(r'>(.*?)', text)
if country != None:
return country.group(1)
print('Страна: ', find_country(fname), 'Столица: ', find_capital(fname))
import re
fname = input('Введите название файла: ')
def open_html(fname):
with open(fname, 'r', encoding='utf-8') as f:
text = f.read()
te = re.sub(u'<.*?(".*?")?.*?>', u'', text, flags = re.U)
te2 = re.sub(u'', u'', te, flags = re.U)
te3 = re.sub(u'', u'', te2, flags = re.U)
te4 = re.sub(u'.*?', u'', te3, flags = re.U)
return te4
def changeform(fname):
te = open_html(fname)
change1 = re.sub(u'комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'слон\\1', te, flags = re.U)
change2 = re.sub(u'Комар(у|е|ы|а(х|м|ми)?|о(м|в))?', u'Слон\\1', change1, flags = re.U)
with open('results.txt', 'w', encoding='utf-8') as n:
n.write(change2)
return 'Готово! Результаты в файле results.txt .'
print(changeform(fname))
import os
import re
from math import log
punct = '[.,!«»?&@"$\[\]\(\):;%
tabs = '[\t\n]'
def preprocessing(text):
text_wo_punct = re.sub(punct, '', text.lower())
text_wo_punct = re.sub(tabs, ' ', text_wo_punct)
words = text_wo_punct.strip().split()
return words
def count_tf(word, text):
n = text.count(word)
return n / len(text)
def count_df(word, texts):
i = [1 for text in texts if word in text]
i = sum(i)
return i
def count_idf(word, texts):
df = count_df(word, texts)
try:
idf = len(texts) / df
except ZeroDivisionError:
return 0
return idf
def count_tfidf(word, text, texts):
tf = count_tf(word, text)
idf = count_idf(word, texts)
tfidf = log(tf, 10)*log(idf, 10)
return tfidf
def keywords(text, texts):
keywords = {}
dic_tfidf = {}
for word in text:
if word in dic_tfidf:
continue
tfidf = count_tfidf(word, text, texts)
dic_tfidf[word] = tfidf
i = 0
for el in sorted(dic_tfidf, key= lambda x: dic_tfidf[x]):
if i > 5:
break
i += 1
keywords[el] = dic_tfidf[el]
return keywords
def main():
texts = {}
for root, dirs, files in os.walk('wikipedia'):
for f in files:
with open(os.path.join(root,f), 'r', encoding='utf-8') as t:
content = t.read()
text = preprocessing(content)
texts[f] = text
raw_texts = list(texts.values())
for t in texts:
print('\nИзвлекаем ключевые слова для текста {}'.format(t))
kwords = keywords(texts[t], raw_texts)
for key in kwords:
print(key, kwords[key])
if __name__ == '__main__':
main()
| |