vectorize_client.models.webcrawler_config

Vectorize API

API for Vectorize services (Beta)

The version of the OpenAPI document: 0.1.2 Generated by OpenAPI Generator (https://openapi-generator.tech)

Do not edit the class manually.

 1# coding: utf-8
 2
 3"""
 4    Vectorize API
 5
 6    API for Vectorize services (Beta)
 7
 8    The version of the OpenAPI document: 0.1.2
 9    Generated by OpenAPI Generator (https://openapi-generator.tech)
10
11    Do not edit the class manually.
12"""  # noqa: E501
13
14
15from __future__ import annotations
16import pprint
17import re  # noqa: F401
18import json
19
20from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt, StrictStr
21from typing import Any, ClassVar, Dict, List, Optional, Union
22from typing import Optional, Set
23from typing_extensions import Self
24
25class WEBCRAWLERConfig(BaseModel):
26    """
27    Configuration for Web Crawler connector
28    """ # noqa: E501
29    allowed_domains_opt: Optional[List[StrictStr]] = Field(default=None, description="Additional Allowed URLs or prefix(es). Add one or more allowed URLs or URL prefixes. The crawler will read URLs that match these patterns in addition to the seed URL(s).. Example: (e.g. https://docs.example.com)", alias="allowed-domains-opt")
30    forbidden_paths: Optional[List[StrictStr]] = Field(default=None, description="Forbidden Paths. Example: Enter forbidden paths (e.g. /admin)", alias="forbidden-paths")
31    min_time_between_requests: Optional[Union[StrictFloat, StrictInt]] = Field(default=500, description="Throttle (ms). Example: Enter minimum time between requests in milliseconds", alias="min-time-between-requests")
32    max_error_count: Optional[Union[StrictFloat, StrictInt]] = Field(default=5, description="Max Error Count. Example: Enter maximum error count", alias="max-error-count")
33    max_urls: Optional[Union[StrictFloat, StrictInt]] = Field(default=1000, description="Max URLs. Example: Enter maximum number of URLs to crawl", alias="max-urls")
34    max_depth: Optional[Union[StrictFloat, StrictInt]] = Field(default=50, description="Max Depth. Example: Enter maximum crawl depth", alias="max-depth")
35    reindex_interval_seconds: Optional[Union[StrictFloat, StrictInt]] = Field(default=3600, description="Reindex Interval (seconds). Example: Enter reindex interval in seconds", alias="reindex-interval-seconds")
36    __properties: ClassVar[List[str]] = ["allowed-domains-opt", "forbidden-paths", "min-time-between-requests", "max-error-count", "max-urls", "max-depth", "reindex-interval-seconds"]
37
38    model_config = ConfigDict(
39        populate_by_name=True,
40        validate_assignment=True,
41        protected_namespaces=(),
42    )
43
44
45    def to_str(self) -> str:
46        """Returns the string representation of the model using alias"""
47        return pprint.pformat(self.model_dump(by_alias=True))
48
49    def to_json(self) -> str:
50        """Returns the JSON representation of the model using alias"""
51        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
52        return json.dumps(self.to_dict())
53
54    @classmethod
55    def from_json(cls, json_str: str) -> Optional[Self]:
56        """Create an instance of WEBCRAWLERConfig from a JSON string"""
57        return cls.from_dict(json.loads(json_str))
58
59    def to_dict(self) -> Dict[str, Any]:
60        """Return the dictionary representation of the model using alias.
61
62        This has the following differences from calling pydantic's
63        `self.model_dump(by_alias=True)`:
64
65        * `None` is only added to the output dict for nullable fields that
66          were set at model initialization. Other fields with value `None`
67          are ignored.
68        """
69        excluded_fields: Set[str] = set([
70        ])
71
72        _dict = self.model_dump(
73            by_alias=True,
74            exclude=excluded_fields,
75            exclude_none=True,
76        )
77        return _dict
78
79    @classmethod
80    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
81        """Create an instance of WEBCRAWLERConfig from a dict"""
82        if obj is None:
83            return None
84
85        if not isinstance(obj, dict):
86            return cls.model_validate(obj)
87
88        _obj = cls.model_validate({
89            "allowed-domains-opt": obj.get("allowed-domains-opt"),
90            "forbidden-paths": obj.get("forbidden-paths"),
91            "min-time-between-requests": obj.get("min-time-between-requests") if obj.get("min-time-between-requests") is not None else 500,
92            "max-error-count": obj.get("max-error-count") if obj.get("max-error-count") is not None else 5,
93            "max-urls": obj.get("max-urls") if obj.get("max-urls") is not None else 1000,
94            "max-depth": obj.get("max-depth") if obj.get("max-depth") is not None else 50,
95            "reindex-interval-seconds": obj.get("reindex-interval-seconds") if obj.get("reindex-interval-seconds") is not None else 3600
96        })
97        return _obj
class WEBCRAWLERConfig(pydantic.main.BaseModel):
26class WEBCRAWLERConfig(BaseModel):
27    """
28    Configuration for Web Crawler connector
29    """ # noqa: E501
30    allowed_domains_opt: Optional[List[StrictStr]] = Field(default=None, description="Additional Allowed URLs or prefix(es). Add one or more allowed URLs or URL prefixes. The crawler will read URLs that match these patterns in addition to the seed URL(s).. Example: (e.g. https://docs.example.com)", alias="allowed-domains-opt")
31    forbidden_paths: Optional[List[StrictStr]] = Field(default=None, description="Forbidden Paths. Example: Enter forbidden paths (e.g. /admin)", alias="forbidden-paths")
32    min_time_between_requests: Optional[Union[StrictFloat, StrictInt]] = Field(default=500, description="Throttle (ms). Example: Enter minimum time between requests in milliseconds", alias="min-time-between-requests")
33    max_error_count: Optional[Union[StrictFloat, StrictInt]] = Field(default=5, description="Max Error Count. Example: Enter maximum error count", alias="max-error-count")
34    max_urls: Optional[Union[StrictFloat, StrictInt]] = Field(default=1000, description="Max URLs. Example: Enter maximum number of URLs to crawl", alias="max-urls")
35    max_depth: Optional[Union[StrictFloat, StrictInt]] = Field(default=50, description="Max Depth. Example: Enter maximum crawl depth", alias="max-depth")
36    reindex_interval_seconds: Optional[Union[StrictFloat, StrictInt]] = Field(default=3600, description="Reindex Interval (seconds). Example: Enter reindex interval in seconds", alias="reindex-interval-seconds")
37    __properties: ClassVar[List[str]] = ["allowed-domains-opt", "forbidden-paths", "min-time-between-requests", "max-error-count", "max-urls", "max-depth", "reindex-interval-seconds"]
38
39    model_config = ConfigDict(
40        populate_by_name=True,
41        validate_assignment=True,
42        protected_namespaces=(),
43    )
44
45
46    def to_str(self) -> str:
47        """Returns the string representation of the model using alias"""
48        return pprint.pformat(self.model_dump(by_alias=True))
49
50    def to_json(self) -> str:
51        """Returns the JSON representation of the model using alias"""
52        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
53        return json.dumps(self.to_dict())
54
55    @classmethod
56    def from_json(cls, json_str: str) -> Optional[Self]:
57        """Create an instance of WEBCRAWLERConfig from a JSON string"""
58        return cls.from_dict(json.loads(json_str))
59
60    def to_dict(self) -> Dict[str, Any]:
61        """Return the dictionary representation of the model using alias.
62
63        This has the following differences from calling pydantic's
64        `self.model_dump(by_alias=True)`:
65
66        * `None` is only added to the output dict for nullable fields that
67          were set at model initialization. Other fields with value `None`
68          are ignored.
69        """
70        excluded_fields: Set[str] = set([
71        ])
72
73        _dict = self.model_dump(
74            by_alias=True,
75            exclude=excluded_fields,
76            exclude_none=True,
77        )
78        return _dict
79
80    @classmethod
81    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
82        """Create an instance of WEBCRAWLERConfig from a dict"""
83        if obj is None:
84            return None
85
86        if not isinstance(obj, dict):
87            return cls.model_validate(obj)
88
89        _obj = cls.model_validate({
90            "allowed-domains-opt": obj.get("allowed-domains-opt"),
91            "forbidden-paths": obj.get("forbidden-paths"),
92            "min-time-between-requests": obj.get("min-time-between-requests") if obj.get("min-time-between-requests") is not None else 500,
93            "max-error-count": obj.get("max-error-count") if obj.get("max-error-count") is not None else 5,
94            "max-urls": obj.get("max-urls") if obj.get("max-urls") is not None else 1000,
95            "max-depth": obj.get("max-depth") if obj.get("max-depth") is not None else 50,
96            "reindex-interval-seconds": obj.get("reindex-interval-seconds") if obj.get("reindex-interval-seconds") is not None else 3600
97        })
98        return _obj

Configuration for Web Crawler connector

allowed_domains_opt: Optional[List[Annotated[str, Strict(strict=True)]]]
forbidden_paths: Optional[List[Annotated[str, Strict(strict=True)]]]
min_time_between_requests: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
max_error_count: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
max_urls: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
max_depth: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
reindex_interval_seconds: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
model_config = {'populate_by_name': True, 'validate_assignment': True, 'protected_namespaces': (), 'validate_by_alias': True, 'validate_by_name': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def to_str(self) -> str:
46    def to_str(self) -> str:
47        """Returns the string representation of the model using alias"""
48        return pprint.pformat(self.model_dump(by_alias=True))

Returns the string representation of the model using alias

def to_json(self) -> str:
50    def to_json(self) -> str:
51        """Returns the JSON representation of the model using alias"""
52        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
53        return json.dumps(self.to_dict())

Returns the JSON representation of the model using alias

@classmethod
def from_json(cls, json_str: str) -> Optional[Self]:
55    @classmethod
56    def from_json(cls, json_str: str) -> Optional[Self]:
57        """Create an instance of WEBCRAWLERConfig from a JSON string"""
58        return cls.from_dict(json.loads(json_str))

Create an instance of WEBCRAWLERConfig from a JSON string

def to_dict(self) -> Dict[str, Any]:
60    def to_dict(self) -> Dict[str, Any]:
61        """Return the dictionary representation of the model using alias.
62
63        This has the following differences from calling pydantic's
64        `self.model_dump(by_alias=True)`:
65
66        * `None` is only added to the output dict for nullable fields that
67          were set at model initialization. Other fields with value `None`
68          are ignored.
69        """
70        excluded_fields: Set[str] = set([
71        ])
72
73        _dict = self.model_dump(
74            by_alias=True,
75            exclude=excluded_fields,
76            exclude_none=True,
77        )
78        return _dict

Return the dictionary representation of the model using alias.

This has the following differences from calling pydantic's self.model_dump(by_alias=True):

  • None is only added to the output dict for nullable fields that were set at model initialization. Other fields with value None are ignored.
@classmethod
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
80    @classmethod
81    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
82        """Create an instance of WEBCRAWLERConfig from a dict"""
83        if obj is None:
84            return None
85
86        if not isinstance(obj, dict):
87            return cls.model_validate(obj)
88
89        _obj = cls.model_validate({
90            "allowed-domains-opt": obj.get("allowed-domains-opt"),
91            "forbidden-paths": obj.get("forbidden-paths"),
92            "min-time-between-requests": obj.get("min-time-between-requests") if obj.get("min-time-between-requests") is not None else 500,
93            "max-error-count": obj.get("max-error-count") if obj.get("max-error-count") is not None else 5,
94            "max-urls": obj.get("max-urls") if obj.get("max-urls") is not None else 1000,
95            "max-depth": obj.get("max-depth") if obj.get("max-depth") is not None else 50,
96            "reindex-interval-seconds": obj.get("reindex-interval-seconds") if obj.get("reindex-interval-seconds") is not None else 3600
97        })
98        return _obj

Create an instance of WEBCRAWLERConfig from a dict