raynardj commited on
Commit
e0a6c5a
·
1 Parent(s): cf33f35

🪁 a decent version

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +78 -53
README.md CHANGED
@@ -11,5 +11,5 @@ pinned: true
11
  ## 随无涯
12
  > 强大的Transformer翻译 + 殆知阁的文言文库
13
 
14
- * 朕亲自下厨的[🤗 翻译模型](https://github.com/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
15
  * 📚 书籍来自 [殆知阁](http://www.daizhige.org/),只为了便于展示翻译,喜欢请访问网站,书籍[github文件链接](https://github.com/garychowcmu/daizhigev20)
 
11
  ## 随无涯
12
  > 强大的Transformer翻译 + 殆知阁的文言文库
13
 
14
+ * 朕亲自下厨的[🤗 翻译模型](https://huggingface.co/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
15
  * 📚 书籍来自 [殆知阁](http://www.daizhige.org/),只为了便于展示翻译,喜欢请访问网站,书籍[github文件链接](https://github.com/garychowcmu/daizhigev20)
app.py CHANGED
@@ -8,7 +8,6 @@ import torch
8
 
9
  st.set_page_config(layout="wide")
10
 
11
-
12
  @st.cache(allow_output_mutation=True)
13
  def load_model():
14
  from transformers import (
@@ -53,13 +52,15 @@ def get_file_df():
53
 
54
  file_df = get_file_df()
55
 
56
- st.sidebar.title("【随无涯】")
57
  st.sidebar.markdown("""
58
- * 朕亲自下厨的[🤗 翻译模型](https://github.com/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
59
- * 📚 书籍来自 [殆知阁](http://www.daizhige.org/),文本的[github api](https://github.com/garychowcmu/daizhigev20)
60
  """)
61
 
62
  c2 = st.container()
 
 
63
  c = st.container()
64
 
65
  USER_ID = st.secrets["USER_ID"]
@@ -84,7 +85,7 @@ def show_file_size(size: int):
84
  else:
85
  return f"{size/1024//1024} MB"
86
 
87
-
88
  def fetch_file(path):
89
  # reading from local path first
90
  if (Path("data")/path).exists():
@@ -104,6 +105,7 @@ def fetch_file(path):
104
  r.raise_for_status()
105
 
106
 
 
107
  def fetch_from_df(sub_paths: str = ""):
108
  sub_df = file_df.copy()
109
  for idx, step in enumerate(sub_paths):
@@ -112,71 +114,94 @@ def fetch_from_df(sub_paths: str = ""):
112
  return None
113
  return list(sub_df[f"col_{len(sub_paths)}"].unique())
114
 
 
 
 
 
115
 
116
- # root_data = fetch_from_github()
117
- if 'pathway' in st.session_state:
118
- pass
119
- else:
120
- st.session_state.pathway = []
121
-
122
- path_text = st.sidebar.text("/".join(st.session_state.pathway))
123
 
 
124
 
125
- def reset_path():
126
- st.session_state.pathway = []
127
- path_text.text(st.session_state.pathway)
128
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- if st.sidebar.button("回到根目录"):
131
- reset_path()
 
 
 
132
 
133
- st.session_state.translating = False
 
 
 
 
 
 
 
 
 
134
 
135
- def display_tree():
136
- sublist = fetch_from_df(st.session_state.pathway)
137
- dropdown = st.sidebar.selectbox("【选书】", options=sublist)
138
- with st.spinner("加载���..."):
139
- st.session_state.pathway.append(dropdown)
140
- if dropdown.endswith('.txt'):
141
- filepath = "/".join(st.session_state.pathway)
142
- file_size = file_size_map[filepath]
143
- with st.spinner(f"loading file:{filepath},({show_file_size(file_size)})"):
144
- # if file size is too large, we will not load it
145
- if file_size > 3*1024*1024:
146
- urlpath = filepath.replace(".txt", ".html")
147
- dzg = f"http://www.daizhige.org/{urlpath}"
148
- st.markdown(f"文件太大,[前往殆知阁页面]({dzg}), 或挑挑其他的书吧")
149
- reset_path()
150
- return None
151
- path_text.text(filepath)
152
- text = fetch_file(filepath)
153
- # create markdown with max heights
154
- c.markdown(
155
- f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True
156
- )
157
- reset_path()
158
 
159
- else:
160
- sub_list = fetch_from_df(
161
- st.session_state.pathway)
162
- path_text.text("/".join(st.session_state.pathway))
163
- display_tree()
164
 
165
- if st.session_state.translating == False:
166
- display_tree()
167
 
168
  def translate_text():
169
  st.session_state.translating = True
170
- if c2.button("【翻译】"):
171
  if cc:
172
  if len(cc) > 168:
173
- c2.write(f"句子太长,最多168个字符")
174
  else:
175
  c2.markdown(f"""```{inference(cc)}```""")
176
  else:
177
- c2.write("请输入文本")
178
  st.session_state.translating = False
179
 
180
- cc = c2.text_area("【输入文本】", height=150)
181
  translate_text()
182
 
 
8
 
9
  st.set_page_config(layout="wide")
10
 
 
11
  @st.cache(allow_output_mutation=True)
12
  def load_model():
13
  from transformers import (
 
52
 
53
  file_df = get_file_df()
54
 
55
+ st.sidebar.title("【隨無涯】")
56
  st.sidebar.markdown("""
57
+ * 朕自庖[🤗 模型](https://huggingface.co/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 訓習處](https://github.com/raynardj/yuan)
58
+ * 📚 充棟汗牛,取自[殆知閣](http://www.daizhige.org/)[github api](https://github.com/garychowcmu/daizhigev20)
59
  """)
60
 
61
  c2 = st.container()
62
+ c2.write("The entirety of ancient Chinese literature, with a modern translator at your side.")
63
+ st.markdown("""---""")
64
  c = st.container()
65
 
66
  USER_ID = st.secrets["USER_ID"]
 
85
  else:
86
  return f"{size/1024//1024} MB"
87
 
88
+ @st.cache(max_entries=100, allow_output_mutation=True)
89
  def fetch_file(path):
90
  # reading from local path first
91
  if (Path("data")/path).exists():
 
105
  r.raise_for_status()
106
 
107
 
108
+ @st.cache(allow_output_mutation=True, max_entries=100)
109
  def fetch_from_df(sub_paths: str = ""):
110
  sub_df = file_df.copy()
111
  for idx, step in enumerate(sub_paths):
 
114
  return None
115
  return list(sub_df[f"col_{len(sub_paths)}"].unique())
116
 
117
+ def show_filepath(filepath: str):
118
+ text = fetch_file(filepath)
119
+ c.markdown(
120
+ f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True)
121
 
122
+ if st.sidebar.selectbox(label="何以尋跡 How to search",options=["以類尋書 category","書名求書 search"])=="以類尋書 category":
 
 
 
 
 
 
123
 
124
+ st.session_state.translating = False
125
 
126
+ # root_data = fetch_from_github()
127
+ if 'pathway' in st.session_state:
128
+ pass
129
+ else:
130
+ st.session_state.pathway = []
131
+
132
+ path_text = st.sidebar.text("/".join(st.session_state.pathway))
133
+
134
+
135
+ def reset_path():
136
+ st.session_state.pathway = []
137
+ path_text.text(st.session_state.pathway)
138
+
139
+
140
+ if st.sidebar.button("還至初錄(back to root)"):
141
+ reset_path()
142
+
143
+ def display_tree():
144
+ sublist = fetch_from_df(st.session_state.pathway)
145
+ dropdown = st.sidebar.selectbox("【擇書 choose】", options=sublist)
146
+ with st.spinner("書非借不能讀也..."):
147
+ st.session_state.pathway.append(dropdown)
148
+ if dropdown.endswith('.txt'):
149
+ filepath = "/".join(st.session_state.pathway)
150
+ file_size = file_size_map[filepath]
151
+ with st.spinner(f"Load 載文:{filepath},({show_file_size(file_size)})"):
152
+ # if file size is too large, we will not load it
153
+ if file_size > 3*1024*1024:
154
+ urlpath = filepath.replace(".txt", ".html")
155
+ dzg = f"http://www.daizhige.org/{urlpath}"
156
+ st.markdown(f"File too big 其文碩而難載,不能為之,[往 殆知閣]({dzg}), 或擇他書")
157
+ reset_path()
158
+ return None
159
+ path_text.text(filepath)
160
+ text = fetch_file(filepath)
161
+ # create markdown with max heights
162
+ c.markdown(
163
+ f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True
164
+ )
165
+ reset_path()
166
 
167
+ else:
168
+ sub_list = fetch_from_df(
169
+ st.session_state.pathway)
170
+ path_text.text("/".join(st.session_state.pathway))
171
+ display_tree()
172
 
173
+ if st.session_state.translating == False:
174
+ display_tree()
175
+ else:
176
+ def search_kw():
177
+ result = file_df[file_df.filepath.str.contains(st.session_state.kw)].reset_index(drop=True)
178
+ if len(result) == 0:
179
+ st.sidebar.write(f"尋之不得:{st.session_state.kw}")
180
+ else:
181
+ filepath = st.sidebar.selectbox("選一書名", options=list(result.head(15).filepath))
182
+ show_filepath(filepath)
183
 
184
+ def loading_with_search():
185
+ kw = st.sidebar.text_input("書名求書 Search", value="楞伽经")
186
+ st.session_state.kw = kw
187
+ search_kw()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
+ if st.session_state.translating == False:
190
+ loading_with_search()
 
 
 
191
 
 
 
192
 
193
  def translate_text():
194
  st.session_state.translating = True
195
+ if c2.button("【曉文達義 Translate】"):
196
  if cc:
197
  if len(cc) > 168:
198
+ c2.write(f"句甚長 不得過百又六十八字 Sentence too long, should be less than 168 characters")
199
  else:
200
  c2.markdown(f"""```{inference(cc)}```""")
201
  else:
202
+ c2.write("【入難曉之文字 Please input sentence for translating】")
203
  st.session_state.translating = False
204
 
205
+ cc = c2.text_area("【入難曉之文字 Input sentence】", height=150)
206
  translate_text()
207