Spaces:
Runtime error
Runtime error
import json | |
import uuid | |
from langchain.text_splitter import CharacterTextSplitter | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=3000, chunk_overlap=0 | |
) | |
def generate_uuid(): | |
return str(uuid.uuid4()) | |
def check_id_extis_in_json(file_id): | |
with open('file_ids.json', 'r') as f: | |
file_ids = json.load(f) | |
if file_id in file_ids: | |
return True | |
else: | |
return False | |
def compare_paper_ids(data, paper_ids): | |
existing_dois = {item['doi_no'] for item in data} | |
missing_paper_ids = [paper_id for paper_id in paper_ids if paper_id not in existing_dois] | |
return missing_paper_ids | |
def extract_json_from_text(text): | |
text = str(text) | |
# print("text",text) | |
try: | |
# Find the JSON part within the text | |
start_index = text.find('{') | |
end_index = text.rfind('}') + 1 | |
json_part = text[start_index:end_index] | |
json_part = json.loads(json_part.lower()) | |
print("json",type(json_part)) | |
print(json_part) | |
return json_part.get('data', []) | |
except Exception as e: | |
print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m") | |
return text |