-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_dummy_data.py
More file actions
223 lines (185 loc) · 7.85 KB
/
create_dummy_data.py
File metadata and controls
223 lines (185 loc) · 7.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""Create one dummy object per OGCR dynamic entity, driven by the spreadsheet.
Field values are taken from the spreadsheet `example` column (option 2), while
foreign-key fields are overwritten with the real id of the referenced object so
the dummy data is referentially consistent (option 1).
How it works:
1. Parse the spreadsheet (`parse_xlsx_entities`) to get each entity's fields,
declared types and example values.
2. Pre-compute a canonical id for every entity that owns a `<entity>_id` field
(taken from that field's example). Because all *_id fields are plain
strings (OBP does not enforce referential integrity here), these ids can be
assigned up front and reused as foreign keys regardless of creation order.
3. For each entity, build a payload from the example values, then override:
- its own `<entity>_id` -> the canonical id for this entity
- any `<other_entity>_id` field -> the canonical id of that other entity
- `compliance_certificate_id` -> certificate_of_compliance's id (alias)
4. POST one object per entity.
Usage:
python3 create_dummy_data.py [path/to/min_field_matrix.xlsx] [--token TOKEN]
"""
import argparse
import json
import logging
import re
import requests
from obp_client import token as default_token, obp_host
from parse_minimum_fields import parse_xlsx_entities
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)-8s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
BASE_URL = obp_host
DEFAULT_SPREADSHEET = "min_field_matrix.xlsx"
# Types OBP treats as non-string; anything else falls back to string.
NON_STRING_TYPES = {"integer", "number", "boolean", "json", "DATE_WITH_DAY"}
# Foreign-key field names that do not follow the `<entity>_id` convention.
FK_ALIASES = {
"compliance_certificate_id": "certificate_of_compliance",
}
# Nice-to-have creation order (parents first). Any entity not listed is appended
# in spreadsheet order. Order is cosmetic only - ids are pre-computed.
PREFERRED_ORDER = [
"operator",
"land_manager",
"parcel",
"certification_scheme",
"certification_body",
"monitoring_plan",
"activity",
"activity_plan",
"certificate_of_compliance",
"parcel_owner_verification",
"activity_verification",
"activity_parcel_verification",
"parcel_monitoring_period_verification",
"activity_monitoring_period_verification",
"audit_report",
]
def print_separator(char="=", length=80):
logger.info(char * length)
def clean_key(raw_key):
"""Strip the ' (optional)' suffix used to mark optional fields."""
return raw_key[:-len(" (optional)")] if raw_key.endswith(" (optional)") else raw_key
def coerce_value(field_meta):
"""Turn a parsed field's example/type into a POST-ready value.
Mirrors the coercion used when building the entity definition so the value
matches the schema property type OBP created.
"""
declared = field_meta.get("value") if isinstance(field_meta, dict) else None
example = field_meta.get("example") if isinstance(field_meta, dict) else field_meta
if example is None:
example = declared # same fallback the definition builder uses
prop_type = declared if declared in NON_STRING_TYPES else "string"
# Normalise the raw example to a stripped string for parsing.
s = example
if isinstance(s, str):
s = s.strip()
if len(s) >= 2 and ((s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'"))):
s = s[1:-1].strip()
if prop_type == "integer":
try:
return int(str(s))
except Exception:
m = re.search(r"-?\d+", str(s))
return int(m.group()) if m else 1
if prop_type == "number":
try:
return float(str(s))
except Exception:
return 1.0
if prop_type == "boolean":
return str(s).strip().lower() == "true"
if prop_type == "json":
if isinstance(s, (dict, list)):
return s
try:
return json.loads(s)
except Exception:
return {}
# DATE_WITH_DAY and string
text = str(s) if s not in (None, "") else "sample"
return text
def fk_target(entity_name, clean_field, entity_names):
"""Return the entity a `<x>_id` field references, or None if not a FK."""
if not clean_field.endswith("_id"):
return None
base = clean_field[:-len("_id")]
if base == entity_name:
return None # this entity's own primary id, not a foreign key
if base in entity_names:
return base
return FK_ALIASES.get(clean_field)
def build_canonical_ids(entities):
"""Canonical id per entity that owns a `<entity>_id` field."""
canonical = {}
for ename, wrap in entities.items():
fields = wrap.get("fields", {})
for raw_key, meta in fields.items():
if clean_key(raw_key) == f"{ename}_id":
canonical[ename] = coerce_value(meta)
break
return canonical
def build_payload(entity_name, wrap, canonical, entity_names):
payload = {}
for raw_key, meta in wrap.get("fields", {}).items():
cf = clean_key(raw_key)
if cf == f"{entity_name}_id":
payload[cf] = canonical.get(entity_name, coerce_value(meta))
continue
target = fk_target(entity_name, cf, entity_names)
if target and target in canonical:
payload[cf] = canonical[target]
continue
payload[cf] = coerce_value(meta)
return payload
def create_object(entity_name, data, token=None):
url = f"{BASE_URL}/obp/dynamic-entity/{entity_name}"
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"DirectLogin token={token}"
response = requests.post(url, headers=headers, json=data)
response.raise_for_status()
return response.json()
def main():
parser = argparse.ArgumentParser(description="Create dummy objects for the OGCR dynamic entities from the spreadsheet.")
parser.add_argument("file", nargs="?", default=DEFAULT_SPREADSHEET, help=f"Spreadsheet path (default: {DEFAULT_SPREADSHEET}).")
parser.add_argument("--token", default=default_token, help="DirectLogin token (overrides obp_client.py).")
args = parser.parse_args()
logger.info("Starting Dummy Data Creation Script")
print_separator()
entities = parse_xlsx_entities(args.file)
if not entities:
logger.error(f"No entities parsed from {args.file}")
return
entity_names = set(entities.keys())
canonical = build_canonical_ids(entities)
logger.info(f"Parsed {len(entities)} entities; {len(canonical)} have their own id field")
# Order: preferred first, then any remaining in spreadsheet order.
ordered = [n for n in PREFERRED_ORDER if n in entities]
ordered += [n for n in entities if n not in ordered]
print_separator("-")
created = 0
failed = 0
for idx, ename in enumerate(ordered, 1):
wrap = entities[ename]
payload = build_payload(ename, wrap, canonical, entity_names)
try:
resp = create_object(ename, payload, token=args.token)
obj = resp.get(ename, resp)
obj_id = obj.get(f"{ename}_id", "<auto>")
logger.info(f" ✓ [{idx}/{len(ordered)}] Created {ename} (id: {obj_id})")
created += 1
except requests.exceptions.HTTPError as e:
detail = e.response.text if getattr(e, "response", None) is not None else str(e)
logger.error(f" ✗ [{idx}/{len(ordered)}] Failed {ename}: {detail}")
failed += 1
except Exception as e:
logger.error(f" ✗ [{idx}/{len(ordered)}] Failed {ename}: {e}")
failed += 1
print_separator("-")
logger.info(f"Dummy Data Summary: {created} created, {failed} failed")
print_separator("=")
if __name__ == "__main__":
main()