kertser commited on
Commit
28a6a88
1 Parent(s): cf8e5a9

Upload WarOnline_Chat.py

Browse files
Files changed (1) hide show
  1. WarOnline_Chat.py +10 -3
WarOnline_Chat.py CHANGED
@@ -24,8 +24,17 @@ password = 'naP2tion'
24
  session = requests.Session()
25
 
26
  def fixString(S):
27
- # Substitute multiple commas with a single one
28
  S = re.sub(",+", ",", S)
 
 
 
 
 
 
 
 
 
29
  return S
30
 
31
  def compare_pages(url1, url2):
@@ -67,8 +76,6 @@ def post(message="", thread_url=thread_url, post_url=post_url, quoted_by="",quot
67
 
68
  if quoted_by:
69
  message = f'[QUOTE="{quoted_by}, post: {quote_source}"]{quote_text}[/QUOTE]{message}'
70
- #message = f'[QUOTE="{quoted_by}, data-source=post: {quote_source}"]{quote_text}[/QUOTE]{message}'
71
- # optionally add @{quoted_by} to indent the quoter
72
 
73
  # Retrieve the thread page HTML
74
  response = session.get(thread_url)
 
24
  session = requests.Session()
25
 
26
  def fixString(S):
27
+ # This is a helper function to overcome the bugs of tokenizer
28
  S = re.sub(",+", ",", S)
29
+ S = re.sub("!.", "!", S)
30
+ S = re.sub(".?", "?", S)
31
+ S = re.sub(",!", "!", S)
32
+ S = re.sub(",.", ",", S)
33
+ S = re.sub(".]", ".", S)
34
+ S = re.sub(",\)", ")", S)
35
+ S = re.sub("&", "", S)
36
+ S = re.sub("&", "", S)
37
+ S = re.sub("ен,ицхак", "ен-ицхак", S)
38
  return S
39
 
40
  def compare_pages(url1, url2):
 
76
 
77
  if quoted_by:
78
  message = f'[QUOTE="{quoted_by}, post: {quote_source}"]{quote_text}[/QUOTE]{message}'
 
 
79
 
80
  # Retrieve the thread page HTML
81
  response = session.get(thread_url)