update
This commit is contained in:
49
data_structure/add_id.py
Normal file
49
data_structure/add_id.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import json
|
||||
import uuid
|
||||
import re
|
||||
|
||||
# Load the original JSON file
|
||||
with open("structure.json", "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
|
||||
# Create the new structure
|
||||
new_structure = {"byId": {}, "allIds": []}
|
||||
|
||||
|
||||
# Function to fix punctuation in titles
|
||||
def fix_title_punctuation(title):
|
||||
# Fix spaces around semicolons (ensure there's a space after each semicolon and no space before)
|
||||
fixed_title = re.sub(r"\s*;\s*", "; ", title)
|
||||
|
||||
# Fix spaces around hyphens (ensure there's a space after but not before)
|
||||
fixed_title = re.sub(r"(\w)-\s+", r"\1- ", fixed_title)
|
||||
|
||||
return fixed_title
|
||||
|
||||
|
||||
# Process each item in the original array
|
||||
for i, item in enumerate(data):
|
||||
# Generate a new UUID
|
||||
new_id = str(uuid.uuid4())
|
||||
|
||||
# Store the original item with the new ID
|
||||
new_item = item.copy()
|
||||
new_item["id"] = new_id
|
||||
new_item["index"] = i
|
||||
new_item["isFirst"] = i == 0
|
||||
new_item["isLast"] = i == len(data) - 1
|
||||
new_item["files"] = [f.replace("pdf", "html") for f in new_item["files"]]
|
||||
|
||||
# Fix the title punctuation
|
||||
if "title" in new_item:
|
||||
new_item["title"] = fix_title_punctuation(new_item["title"])
|
||||
|
||||
# Add to the new structure
|
||||
new_structure["byId"][new_id] = new_item
|
||||
new_structure["allIds"].append(new_id)
|
||||
|
||||
# Save the new structure to topics.json
|
||||
with open("topics.json", "w", encoding="utf-8") as file:
|
||||
json.dump(new_structure, file, ensure_ascii=False, indent=4)
|
||||
|
||||
print("topics.json created successfully with fixed punctuation")
|
||||
Reference in New Issue
Block a user