Spaces:
Runtime error
Runtime error
File size: 2,701 Bytes
d83dcd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
import streamlit as st
import common
import os
import pickle
import easyocr
from log import logger
from pathlib import Path
from llama_index import Document
common.check_login()
INDEX_NAME = os.environ["INDEX_NAME"]
PKL_NAME = os.environ["PKL_NAME"]
if "file_uploader_key" not in st.session_state:
st.session_state["file_uploader_key"] = 0
st.title("📝 ImportImageFileEasyOcr")
uploaded_file = st.file_uploader("Upload an article", type=("png", "jpg", "jpeg"),key=st.session_state["file_uploader_key"])
if st.button("import",use_container_width=True):
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
try:
with open(filepath, 'wb') as f:
f.write(uploaded_file.getvalue())
f.close()
logger.info(filepath)
reader = easyocr.Reader(['ja','en'], gpu=False) # this needs to run only once to load the model into memory
result = reader.readtext(filepath, detail = 0, paragraph=True)
text = ''.join(result)
#読み込む画像ファイルのパスを設定
# IMG_FILE_PATH = "ocrtest.jpg"
#言語ファイルのパスを環境変数に設定
# tessdata_dir = "D:\project\stylez\chatGPT\llamaindex-streamlit\llm-examples-main\traindata"
# os.environ["TESSDATA_PREFIX"] = tessdata_dir
#画像ファイルを開く
# image = Image.open(filepath)
# #画像をRGBモードに変換
# image = image.convert('RGB')
# #画像から文字列データを抽出
# logger.info("image")
# text = pytesseract.image_to_string(image, lang='jpn')
logger.info(text)
document = Document(text=text)
logger.info(document)
document.metadata={'filename': os.path.basename(uploaded_file.name)}
st.session_state.stored_docs.append(uploaded_file.name)
logger.info(st.session_state.stored_docs)
st.session_state.index.insert(document=document)
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME)
os.remove(filepath)
common.setChatEngine()
with open(PKL_NAME, "wb") as f:
print("pickle")
pickle.dump(st.session_state.stored_docs, f)
st.session_state["file_uploader_key"] += 1
st.experimental_rerun()
except Exception as e:
# cleanup temp file
logger.error(e)
if filepath is not None and os.path.exists(filepath):
os.remove(filepath)
st.subheader("Import File List")
if "stored_docs" in st.session_state:
logger.info(st.session_state.stored_docs)
for docname in st.session_state.stored_docs:
st.write(docname)
|