Spaces:
Runtime error
Runtime error
raynardj
commited on
Commit
·
e0a6c5a
1
Parent(s):
cf33f35
🪁 a decent version
Browse files
README.md
CHANGED
@@ -11,5 +11,5 @@ pinned: true
|
|
11 |
## 随无涯
|
12 |
> 强大的Transformer翻译 + 殆知阁的文言文库
|
13 |
|
14 |
-
* 朕亲自下厨的[🤗 翻译模型](https://
|
15 |
* 📚 书籍来自 [殆知阁](http://www.daizhige.org/),只为了便于展示翻译,喜欢请访问网站,书籍[github文件链接](https://github.com/garychowcmu/daizhigev20)
|
|
|
11 |
## 随无涯
|
12 |
> 强大的Transformer翻译 + 殆知阁的文言文库
|
13 |
|
14 |
+
* 朕亲自下厨的[🤗 翻译模型](https://huggingface.co/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
|
15 |
* 📚 书籍来自 [殆知阁](http://www.daizhige.org/),只为了便于展示翻译,喜欢请访问网站,书籍[github文件链接](https://github.com/garychowcmu/daizhigev20)
|
app.py
CHANGED
@@ -8,7 +8,6 @@ import torch
|
|
8 |
|
9 |
st.set_page_config(layout="wide")
|
10 |
|
11 |
-
|
12 |
@st.cache(allow_output_mutation=True)
|
13 |
def load_model():
|
14 |
from transformers import (
|
@@ -53,13 +52,15 @@ def get_file_df():
|
|
53 |
|
54 |
file_df = get_file_df()
|
55 |
|
56 |
-
st.sidebar.title("
|
57 |
st.sidebar.markdown("""
|
58 |
-
*
|
59 |
-
* 📚
|
60 |
""")
|
61 |
|
62 |
c2 = st.container()
|
|
|
|
|
63 |
c = st.container()
|
64 |
|
65 |
USER_ID = st.secrets["USER_ID"]
|
@@ -84,7 +85,7 @@ def show_file_size(size: int):
|
|
84 |
else:
|
85 |
return f"{size/1024//1024} MB"
|
86 |
|
87 |
-
|
88 |
def fetch_file(path):
|
89 |
# reading from local path first
|
90 |
if (Path("data")/path).exists():
|
@@ -104,6 +105,7 @@ def fetch_file(path):
|
|
104 |
r.raise_for_status()
|
105 |
|
106 |
|
|
|
107 |
def fetch_from_df(sub_paths: str = ""):
|
108 |
sub_df = file_df.copy()
|
109 |
for idx, step in enumerate(sub_paths):
|
@@ -112,71 +114,94 @@ def fetch_from_df(sub_paths: str = ""):
|
|
112 |
return None
|
113 |
return list(sub_df[f"col_{len(sub_paths)}"].unique())
|
114 |
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
|
117 |
-
if 'pathway' in st.session_state:
|
118 |
-
pass
|
119 |
-
else:
|
120 |
-
st.session_state.pathway = []
|
121 |
-
|
122 |
-
path_text = st.sidebar.text("/".join(st.session_state.pathway))
|
123 |
|
|
|
124 |
|
125 |
-
|
126 |
-
st.session_state
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
132 |
|
133 |
-
st.session_state.translating
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
def
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
st.session_state.pathway.append(dropdown)
|
140 |
-
if dropdown.endswith('.txt'):
|
141 |
-
filepath = "/".join(st.session_state.pathway)
|
142 |
-
file_size = file_size_map[filepath]
|
143 |
-
with st.spinner(f"loading file:{filepath},({show_file_size(file_size)})"):
|
144 |
-
# if file size is too large, we will not load it
|
145 |
-
if file_size > 3*1024*1024:
|
146 |
-
urlpath = filepath.replace(".txt", ".html")
|
147 |
-
dzg = f"http://www.daizhige.org/{urlpath}"
|
148 |
-
st.markdown(f"文件太大,[前往殆知阁页面]({dzg}), 或挑挑其他的书吧")
|
149 |
-
reset_path()
|
150 |
-
return None
|
151 |
-
path_text.text(filepath)
|
152 |
-
text = fetch_file(filepath)
|
153 |
-
# create markdown with max heights
|
154 |
-
c.markdown(
|
155 |
-
f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True
|
156 |
-
)
|
157 |
-
reset_path()
|
158 |
|
159 |
-
|
160 |
-
|
161 |
-
st.session_state.pathway)
|
162 |
-
path_text.text("/".join(st.session_state.pathway))
|
163 |
-
display_tree()
|
164 |
|
165 |
-
if st.session_state.translating == False:
|
166 |
-
display_tree()
|
167 |
|
168 |
def translate_text():
|
169 |
st.session_state.translating = True
|
170 |
-
if c2.button("
|
171 |
if cc:
|
172 |
if len(cc) > 168:
|
173 |
-
c2.write(f"
|
174 |
else:
|
175 |
c2.markdown(f"""```{inference(cc)}```""")
|
176 |
else:
|
177 |
-
c2.write("
|
178 |
st.session_state.translating = False
|
179 |
|
180 |
-
cc = c2.text_area("
|
181 |
translate_text()
|
182 |
|
|
|
8 |
|
9 |
st.set_page_config(layout="wide")
|
10 |
|
|
|
11 |
@st.cache(allow_output_mutation=True)
|
12 |
def load_model():
|
13 |
from transformers import (
|
|
|
52 |
|
53 |
file_df = get_file_df()
|
54 |
|
55 |
+
st.sidebar.title("【隨無涯】")
|
56 |
st.sidebar.markdown("""
|
57 |
+
* 朕自庖[🤗 模型](https://huggingface.co/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 訓習處](https://github.com/raynardj/yuan)
|
58 |
+
* 📚 充棟汗牛,取自[殆知閣](http://www.daizhige.org/),[github api](https://github.com/garychowcmu/daizhigev20)
|
59 |
""")
|
60 |
|
61 |
c2 = st.container()
|
62 |
+
c2.write("The entirety of ancient Chinese literature, with a modern translator at your side.")
|
63 |
+
st.markdown("""---""")
|
64 |
c = st.container()
|
65 |
|
66 |
USER_ID = st.secrets["USER_ID"]
|
|
|
85 |
else:
|
86 |
return f"{size/1024//1024} MB"
|
87 |
|
88 |
+
@st.cache(max_entries=100, allow_output_mutation=True)
|
89 |
def fetch_file(path):
|
90 |
# reading from local path first
|
91 |
if (Path("data")/path).exists():
|
|
|
105 |
r.raise_for_status()
|
106 |
|
107 |
|
108 |
+
@st.cache(allow_output_mutation=True, max_entries=100)
|
109 |
def fetch_from_df(sub_paths: str = ""):
|
110 |
sub_df = file_df.copy()
|
111 |
for idx, step in enumerate(sub_paths):
|
|
|
114 |
return None
|
115 |
return list(sub_df[f"col_{len(sub_paths)}"].unique())
|
116 |
|
117 |
+
def show_filepath(filepath: str):
|
118 |
+
text = fetch_file(filepath)
|
119 |
+
c.markdown(
|
120 |
+
f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True)
|
121 |
|
122 |
+
if st.sidebar.selectbox(label="何以尋跡 How to search",options=["以類尋書 category","書名求書 search"])=="以類尋書 category":
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
st.session_state.translating = False
|
125 |
|
126 |
+
# root_data = fetch_from_github()
|
127 |
+
if 'pathway' in st.session_state:
|
128 |
+
pass
|
129 |
+
else:
|
130 |
+
st.session_state.pathway = []
|
131 |
+
|
132 |
+
path_text = st.sidebar.text("/".join(st.session_state.pathway))
|
133 |
+
|
134 |
+
|
135 |
+
def reset_path():
|
136 |
+
st.session_state.pathway = []
|
137 |
+
path_text.text(st.session_state.pathway)
|
138 |
+
|
139 |
+
|
140 |
+
if st.sidebar.button("還至初錄(back to root)"):
|
141 |
+
reset_path()
|
142 |
+
|
143 |
+
def display_tree():
|
144 |
+
sublist = fetch_from_df(st.session_state.pathway)
|
145 |
+
dropdown = st.sidebar.selectbox("【擇書 choose】", options=sublist)
|
146 |
+
with st.spinner("書非借不能讀也..."):
|
147 |
+
st.session_state.pathway.append(dropdown)
|
148 |
+
if dropdown.endswith('.txt'):
|
149 |
+
filepath = "/".join(st.session_state.pathway)
|
150 |
+
file_size = file_size_map[filepath]
|
151 |
+
with st.spinner(f"Load 載文:{filepath},({show_file_size(file_size)})"):
|
152 |
+
# if file size is too large, we will not load it
|
153 |
+
if file_size > 3*1024*1024:
|
154 |
+
urlpath = filepath.replace(".txt", ".html")
|
155 |
+
dzg = f"http://www.daizhige.org/{urlpath}"
|
156 |
+
st.markdown(f"File too big 其文碩而難載,不能為之,[往 殆知閣]({dzg}), 或擇他書")
|
157 |
+
reset_path()
|
158 |
+
return None
|
159 |
+
path_text.text(filepath)
|
160 |
+
text = fetch_file(filepath)
|
161 |
+
# create markdown with max heights
|
162 |
+
c.markdown(
|
163 |
+
f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True
|
164 |
+
)
|
165 |
+
reset_path()
|
166 |
|
167 |
+
else:
|
168 |
+
sub_list = fetch_from_df(
|
169 |
+
st.session_state.pathway)
|
170 |
+
path_text.text("/".join(st.session_state.pathway))
|
171 |
+
display_tree()
|
172 |
|
173 |
+
if st.session_state.translating == False:
|
174 |
+
display_tree()
|
175 |
+
else:
|
176 |
+
def search_kw():
|
177 |
+
result = file_df[file_df.filepath.str.contains(st.session_state.kw)].reset_index(drop=True)
|
178 |
+
if len(result) == 0:
|
179 |
+
st.sidebar.write(f"尋之不得:{st.session_state.kw}")
|
180 |
+
else:
|
181 |
+
filepath = st.sidebar.selectbox("選一書名", options=list(result.head(15).filepath))
|
182 |
+
show_filepath(filepath)
|
183 |
|
184 |
+
def loading_with_search():
|
185 |
+
kw = st.sidebar.text_input("書名求書 Search", value="楞伽经")
|
186 |
+
st.session_state.kw = kw
|
187 |
+
search_kw()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
+
if st.session_state.translating == False:
|
190 |
+
loading_with_search()
|
|
|
|
|
|
|
191 |
|
|
|
|
|
192 |
|
193 |
def translate_text():
|
194 |
st.session_state.translating = True
|
195 |
+
if c2.button("【曉文達義 Translate】"):
|
196 |
if cc:
|
197 |
if len(cc) > 168:
|
198 |
+
c2.write(f"句甚長 不得過百又六十八字 Sentence too long, should be less than 168 characters")
|
199 |
else:
|
200 |
c2.markdown(f"""```{inference(cc)}```""")
|
201 |
else:
|
202 |
+
c2.write("【入難曉之文字 Please input sentence for translating】")
|
203 |
st.session_state.translating = False
|
204 |
|
205 |
+
cc = c2.text_area("【入難曉之文字 Input sentence】", height=150)
|
206 |
translate_text()
|
207 |
|