-
Notifications
You must be signed in to change notification settings - Fork 631
feat(data-collection): foundation — option, resolution, accessors #6676
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,12 @@ | |
| VERSION, | ||
| ClientConstructor, | ||
| ) | ||
| from sentry_sdk.data_collection import ( | ||
| OFF_DATA_COLLECTION, | ||
| DataCollection, | ||
| _map_from_send_default_pii, | ||
| resolve_data_collection, | ||
| ) | ||
| from sentry_sdk.envelope import Envelope, Item, PayloadRef | ||
| from sentry_sdk.integrations import _DEFAULT_INTEGRATIONS, setup_integrations | ||
| from sentry_sdk.integrations.dedupe import DedupeIntegration | ||
|
|
@@ -345,11 +351,11 @@ def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]": | |
| if rv["enable_tracing"] is True and rv["traces_sample_rate"] is None: | ||
| rv["traces_sample_rate"] = 1.0 | ||
|
|
||
| rv["data_collection"] = resolve_data_collection(rv) | ||
|
|
||
| if rv["event_scrubber"] is None: | ||
| rv["event_scrubber"] = EventScrubber( | ||
| send_default_pii=( | ||
| False if rv["send_default_pii"] is None else rv["send_default_pii"] | ||
| ) | ||
| send_default_pii=rv["data_collection"].user_info | ||
| ) | ||
|
|
||
| if rv["socket_options"] and not isinstance(rv["socket_options"], list): | ||
|
|
@@ -425,6 +431,23 @@ def parsed_dsn(self) -> "Optional[Dsn]": | |
| def should_send_default_pii(self) -> bool: | ||
| return False | ||
|
|
||
| @property | ||
| def data_collection(self) -> "DataCollection": | ||
| return OFF_DATA_COLLECTION | ||
|
|
||
| def should_collect_user_info(self) -> bool: | ||
| return False | ||
|
|
||
| def should_collect_gen_ai_inputs( | ||
| self, include_prompts: "Optional[bool]" = None | ||
| ) -> bool: | ||
| return False | ||
|
|
||
| def should_collect_gen_ai_outputs( | ||
| self, include_prompts: "Optional[bool]" = None | ||
| ) -> bool: | ||
| return False | ||
|
Comment on lines
+435
to
+449
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To reduce the API surface on the client, I think we should only expose the Edit: alternatively, we could introduce helper methods similar to what we do for streamed spans ( |
||
|
|
||
| def is_active(self) -> bool: | ||
| """ | ||
| .. versionadded:: 2.0.0 | ||
|
|
@@ -614,6 +637,17 @@ def _record_lost_event( | |
| self.options["error_sampler"] = sample_all | ||
| self.options["traces_sampler"] = sample_all | ||
| self.options["profiles_sampler"] = sample_all | ||
| # data_collection was resolved in _get_options() before this | ||
| # spotlight override flipped send_default_pii on. Re-derive it so | ||
| # the should_collect_* accessors agree with should_send_default_pii() | ||
| # in DSN-less spotlight mode (only when the user did not set | ||
| # data_collection explicitly). | ||
| if not self.options["data_collection"].explicit: | ||
| self.options["data_collection"] = _map_from_send_default_pii( | ||
| True, | ||
| self.options["include_local_variables"] is not False, | ||
| self.options["include_source_context"] is not False, | ||
|
Comment on lines
+647
to
+649
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to another spot I mentioned this on, this is not easy to follow, should pass this in as named args, not as positional args |
||
| ) | ||
|
|
||
| self.session_flusher = SessionFlusher(capture_func=_capture_envelope) | ||
|
|
||
|
|
@@ -724,6 +758,59 @@ def should_send_default_pii(self) -> bool: | |
| """ | ||
| return self.options.get("send_default_pii") or False | ||
|
|
||
| @property | ||
| def data_collection(self) -> "DataCollection": | ||
| """ | ||
| Returns the resolved :class:`~sentry_sdk.data_collection.DataCollection` | ||
| config for this client. | ||
| """ | ||
| dc = self.options.get("data_collection") | ||
| return dc if dc is not None else OFF_DATA_COLLECTION | ||
|
|
||
| def should_collect_user_info(self) -> bool: | ||
| """ | ||
| Returns whether the SDK should automatically populate ``user.*`` fields | ||
| (id, email, username, ip_address) from instrumentation. | ||
| """ | ||
| return bool(self.data_collection.user_info) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Edit: should this, similar to |
||
|
|
||
| def should_collect_gen_ai_inputs( | ||
| self, include_prompts: "Optional[bool]" = None | ||
| ) -> bool: | ||
| """ | ||
| Returns whether the SDK should collect generative AI input content. | ||
|
|
||
| ``include_prompts`` is the integration-level override (if set, it takes | ||
| precedence over the global ``data_collection.gen_ai.inputs`` setting). | ||
| """ | ||
| return self._should_collect_gen_ai_content("inputs", include_prompts) | ||
|
|
||
| def should_collect_gen_ai_outputs( | ||
| self, include_prompts: "Optional[bool]" = None | ||
| ) -> bool: | ||
| """ | ||
| Returns whether the SDK should collect generative AI output content. | ||
|
|
||
| ``include_prompts`` is the integration-level override (if set, it takes | ||
| precedence over the global ``data_collection.gen_ai.outputs`` setting). | ||
| """ | ||
| return self._should_collect_gen_ai_content("outputs", include_prompts) | ||
|
|
||
| def _should_collect_gen_ai_content( | ||
| self, direction: str, include_prompts: "Optional[bool]" | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should try to lock down the possible values of |
||
| ) -> bool: | ||
| dc = self.data_collection | ||
| if dc.explicit: | ||
| # Integration-level override wins over the global gen_ai setting. | ||
| if include_prompts is not None: | ||
| return include_prompts | ||
| return bool(getattr(dc.gen_ai, direction)) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We also shouldn't need to cast to |
||
| # Legacy (data_collection not set): preserve the historical gate | ||
| # `should_send_default_pii() and integration.include_prompts`. | ||
| # `include_prompts is None` means "no integration-level override", which | ||
| # falls back to the legacy default of True (collect when PII is on). | ||
| return self.should_send_default_pii() and (include_prompts is not False) | ||
|
|
||
| @property | ||
| def dsn(self) -> "Optional[str]": | ||
| """Returns the configured DSN as string.""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would need to be removed