3GPPIndexers / app.py
Omar ID EL MOUMEN
PoC push
f74f129
import gradio as gr
import threading
import sys
import io
import time
from dotenv import load_dotenv
import os
from contextlib import redirect_stdout
from git import Repo
from huggingface_hub import Repository, HfApi
load_dotenv()
# == PATHS and SETTINGS ==
DOC_INDEXER = "indexer_multi.py"
SPEC_INDEXER = "spec_indexer_multi.py"
DOC_INDEX_FILE = "indexed_docs.json"
SPEC_INDEX_FILE = "indexed_specifications.json"
GIT_REPO_PATH = os.path.abspath(".") # or absolute path to repo
HF_REPO_ID = "OrganizedProgrammers/3GPPDocFinder"
HF_TOKEN = os.environ.get("HF_TOKEN") # set this as env var
# == Helpers ==
def run_python_module(module_path):
"""
Dynamically run a python module, capture and yield stdout in real time.
"""
def runner():
local_vars = {}
buffer = io.StringIO()
try:
with redirect_stdout(buffer):
# Import as module, call main()
import runpy
runpy.run_path(module_path, run_name="__main__")
except Exception as e:
print(f"\n❌ Error: {e}")
finally:
yield buffer.getvalue()
buffer.close()
yield from runner()
def commit_and_push_github(files, message):
repo = Repo(GIT_REPO_PATH)
repo.git.add(files)
repo.index.commit(message)
try:
repo.git.push()
except Exception as e:
print(f"Git push failed: {e}")
def commit_and_push_hf(files, message):
if not HF_TOKEN:
return "No HF_TOKEN provided. Skipping HuggingFace push."
hf_repo_dir = os.path.join(GIT_REPO_PATH, "hf_spaces")
repo = None
if not os.path.exists(hf_repo_dir):
repo = Repository(
local_dir=hf_repo_dir,
clone_from=HF_REPO_ID,
token=HF_TOKEN,
skip_lfs_files=True
)
else:
repo = Repository(
local_dir=hf_repo_dir,
token=HF_TOKEN,
skip_lfs_files=True
)
repo.git_pull()
# Copy artifact files to huggingface space
for f in files:
import shutil
shutil.copy2(f, os.path.join(hf_repo_dir, f))
repo.git_add(auto_lfs_track=True)
repo.git_commit(message)
repo.git_push()
return "Pushed to HuggingFace."
def get_docs_stats():
if os.path.exists(DOC_INDEX_FILE):
import json
with open(DOC_INDEX_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
return len(data["docs"])
return 0
def get_specs_stats():
if os.path.exists(SPEC_INDEX_FILE):
import json
with open(SPEC_INDEX_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
return len(data["specs"])
return 0
def get_scopes_stats():
if os.path.exists(SPEC_INDEX_FILE):
import json
with open(SPEC_INDEX_FILE, 'r', encoding="utf-8") as f:
data = json.load(f)
return len(data['scopes'])
return 0
# == Gradio Functions ==
def index_documents(progress=gr.Progress()):
progress(0, desc="Starting document indexing…")
log = ""
for output in run_python_module(DOC_INDEXER):
log = output
progress(0.7, desc="Indexing in progress...")
yield log
commit_and_push_github([DOC_INDEX_FILE], "Update doc index via Gradio")
commit_and_push_hf([DOC_INDEX_FILE], "Update doc index via Gradio")
progress(1, desc="Done!")
yield log + "\n\n✅ Finished! Committed and pushed."
def index_specifications(progress=gr.Progress()):
progress(0, desc="Starting specifications indexing…")
log = ""
for output in run_python_module(SPEC_INDEXER):
log = output
progress(0.7, desc="Indexing in progress...")
yield log
commit_and_push_github([SPEC_INDEX_FILE], "Update spec index via Gradio")
commit_and_push_hf([SPEC_INDEX_FILE], "Update spec index via Gradio")
progress(1, desc="Done!")
yield log + "\n\n✅ Finished! Committed and pushed."
def refresh_stats():
return str(get_docs_stats()), str(get_specs_stats()), str(get_scopes_stats())
# == UI ==
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 📄 3GPP Indexers")
with gr.Row():
with gr.Column():
doc_count = gr.Textbox(label="Docs Indexed", value=str(get_docs_stats()), interactive=False)
btn_docs = gr.Button("Re-index Documents", variant="primary")
with gr.Column():
spec_count = gr.Textbox(label="Specs Indexed", value=str(get_specs_stats()), interactive=False)
btn_specs = gr.Button("Re-index Specifications", variant="primary")
with gr.Column():
scope_count = gr.Textbox(label="Scopes Indexed", value=str(get_scopes_stats()), interactive=False)
out = gr.Textbox(label="Output/Log", lines=13)
refresh = gr.Button("🔄 Refresh Stats")
btn_docs.click(index_documents, outputs=out)
btn_specs.click(index_specifications, outputs=out)
refresh.click(refresh_stats, outputs=[doc_count, spec_count, scope_count])
if __name__ == "__main__":
demo.launch()