vectorize_client.models.webcrawler_config
Vectorize API
API for Vectorize services (Beta)
The version of the OpenAPI document: 0.1.2 Generated by OpenAPI Generator (https://openapi-generator.tech)
Do not edit the class manually.
1# coding: utf-8 2 3""" 4 Vectorize API 5 6 API for Vectorize services (Beta) 7 8 The version of the OpenAPI document: 0.1.2 9 Generated by OpenAPI Generator (https://openapi-generator.tech) 10 11 Do not edit the class manually. 12""" # noqa: E501 13 14 15from __future__ import annotations 16import pprint 17import re # noqa: F401 18import json 19 20from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt, StrictStr 21from typing import Any, ClassVar, Dict, List, Optional, Union 22from typing import Optional, Set 23from typing_extensions import Self 24 25class WEBCRAWLERConfig(BaseModel): 26 """ 27 Configuration for Web Crawler connector 28 """ # noqa: E501 29 allowed_domains_opt: Optional[List[StrictStr]] = Field(default=None, description="Additional Allowed URLs or prefix(es). Add one or more allowed URLs or URL prefixes. The crawler will read URLs that match these patterns in addition to the seed URL(s).. Example: (e.g. https://docs.example.com)", alias="allowed-domains-opt") 30 forbidden_paths: Optional[List[StrictStr]] = Field(default=None, description="Forbidden Paths. Example: Enter forbidden paths (e.g. /admin)", alias="forbidden-paths") 31 min_time_between_requests: Optional[Union[StrictFloat, StrictInt]] = Field(default=500, description="Throttle (ms). Example: Enter minimum time between requests in milliseconds", alias="min-time-between-requests") 32 max_error_count: Optional[Union[StrictFloat, StrictInt]] = Field(default=5, description="Max Error Count. Example: Enter maximum error count", alias="max-error-count") 33 max_urls: Optional[Union[StrictFloat, StrictInt]] = Field(default=1000, description="Max URLs. Example: Enter maximum number of URLs to crawl", alias="max-urls") 34 max_depth: Optional[Union[StrictFloat, StrictInt]] = Field(default=50, description="Max Depth. Example: Enter maximum crawl depth", alias="max-depth") 35 reindex_interval_seconds: Optional[Union[StrictFloat, StrictInt]] = Field(default=3600, description="Reindex Interval (seconds). Example: Enter reindex interval in seconds", alias="reindex-interval-seconds") 36 __properties: ClassVar[List[str]] = ["allowed-domains-opt", "forbidden-paths", "min-time-between-requests", "max-error-count", "max-urls", "max-depth", "reindex-interval-seconds"] 37 38 model_config = ConfigDict( 39 populate_by_name=True, 40 validate_assignment=True, 41 protected_namespaces=(), 42 ) 43 44 45 def to_str(self) -> str: 46 """Returns the string representation of the model using alias""" 47 return pprint.pformat(self.model_dump(by_alias=True)) 48 49 def to_json(self) -> str: 50 """Returns the JSON representation of the model using alias""" 51 # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead 52 return json.dumps(self.to_dict()) 53 54 @classmethod 55 def from_json(cls, json_str: str) -> Optional[Self]: 56 """Create an instance of WEBCRAWLERConfig from a JSON string""" 57 return cls.from_dict(json.loads(json_str)) 58 59 def to_dict(self) -> Dict[str, Any]: 60 """Return the dictionary representation of the model using alias. 61 62 This has the following differences from calling pydantic's 63 `self.model_dump(by_alias=True)`: 64 65 * `None` is only added to the output dict for nullable fields that 66 were set at model initialization. Other fields with value `None` 67 are ignored. 68 """ 69 excluded_fields: Set[str] = set([ 70 ]) 71 72 _dict = self.model_dump( 73 by_alias=True, 74 exclude=excluded_fields, 75 exclude_none=True, 76 ) 77 return _dict 78 79 @classmethod 80 def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: 81 """Create an instance of WEBCRAWLERConfig from a dict""" 82 if obj is None: 83 return None 84 85 if not isinstance(obj, dict): 86 return cls.model_validate(obj) 87 88 _obj = cls.model_validate({ 89 "allowed-domains-opt": obj.get("allowed-domains-opt"), 90 "forbidden-paths": obj.get("forbidden-paths"), 91 "min-time-between-requests": obj.get("min-time-between-requests") if obj.get("min-time-between-requests") is not None else 500, 92 "max-error-count": obj.get("max-error-count") if obj.get("max-error-count") is not None else 5, 93 "max-urls": obj.get("max-urls") if obj.get("max-urls") is not None else 1000, 94 "max-depth": obj.get("max-depth") if obj.get("max-depth") is not None else 50, 95 "reindex-interval-seconds": obj.get("reindex-interval-seconds") if obj.get("reindex-interval-seconds") is not None else 3600 96 }) 97 return _obj
class
WEBCRAWLERConfig(pydantic.main.BaseModel):
26class WEBCRAWLERConfig(BaseModel): 27 """ 28 Configuration for Web Crawler connector 29 """ # noqa: E501 30 allowed_domains_opt: Optional[List[StrictStr]] = Field(default=None, description="Additional Allowed URLs or prefix(es). Add one or more allowed URLs or URL prefixes. The crawler will read URLs that match these patterns in addition to the seed URL(s).. Example: (e.g. https://docs.example.com)", alias="allowed-domains-opt") 31 forbidden_paths: Optional[List[StrictStr]] = Field(default=None, description="Forbidden Paths. Example: Enter forbidden paths (e.g. /admin)", alias="forbidden-paths") 32 min_time_between_requests: Optional[Union[StrictFloat, StrictInt]] = Field(default=500, description="Throttle (ms). Example: Enter minimum time between requests in milliseconds", alias="min-time-between-requests") 33 max_error_count: Optional[Union[StrictFloat, StrictInt]] = Field(default=5, description="Max Error Count. Example: Enter maximum error count", alias="max-error-count") 34 max_urls: Optional[Union[StrictFloat, StrictInt]] = Field(default=1000, description="Max URLs. Example: Enter maximum number of URLs to crawl", alias="max-urls") 35 max_depth: Optional[Union[StrictFloat, StrictInt]] = Field(default=50, description="Max Depth. Example: Enter maximum crawl depth", alias="max-depth") 36 reindex_interval_seconds: Optional[Union[StrictFloat, StrictInt]] = Field(default=3600, description="Reindex Interval (seconds). Example: Enter reindex interval in seconds", alias="reindex-interval-seconds") 37 __properties: ClassVar[List[str]] = ["allowed-domains-opt", "forbidden-paths", "min-time-between-requests", "max-error-count", "max-urls", "max-depth", "reindex-interval-seconds"] 38 39 model_config = ConfigDict( 40 populate_by_name=True, 41 validate_assignment=True, 42 protected_namespaces=(), 43 ) 44 45 46 def to_str(self) -> str: 47 """Returns the string representation of the model using alias""" 48 return pprint.pformat(self.model_dump(by_alias=True)) 49 50 def to_json(self) -> str: 51 """Returns the JSON representation of the model using alias""" 52 # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead 53 return json.dumps(self.to_dict()) 54 55 @classmethod 56 def from_json(cls, json_str: str) -> Optional[Self]: 57 """Create an instance of WEBCRAWLERConfig from a JSON string""" 58 return cls.from_dict(json.loads(json_str)) 59 60 def to_dict(self) -> Dict[str, Any]: 61 """Return the dictionary representation of the model using alias. 62 63 This has the following differences from calling pydantic's 64 `self.model_dump(by_alias=True)`: 65 66 * `None` is only added to the output dict for nullable fields that 67 were set at model initialization. Other fields with value `None` 68 are ignored. 69 """ 70 excluded_fields: Set[str] = set([ 71 ]) 72 73 _dict = self.model_dump( 74 by_alias=True, 75 exclude=excluded_fields, 76 exclude_none=True, 77 ) 78 return _dict 79 80 @classmethod 81 def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: 82 """Create an instance of WEBCRAWLERConfig from a dict""" 83 if obj is None: 84 return None 85 86 if not isinstance(obj, dict): 87 return cls.model_validate(obj) 88 89 _obj = cls.model_validate({ 90 "allowed-domains-opt": obj.get("allowed-domains-opt"), 91 "forbidden-paths": obj.get("forbidden-paths"), 92 "min-time-between-requests": obj.get("min-time-between-requests") if obj.get("min-time-between-requests") is not None else 500, 93 "max-error-count": obj.get("max-error-count") if obj.get("max-error-count") is not None else 5, 94 "max-urls": obj.get("max-urls") if obj.get("max-urls") is not None else 1000, 95 "max-depth": obj.get("max-depth") if obj.get("max-depth") is not None else 50, 96 "reindex-interval-seconds": obj.get("reindex-interval-seconds") if obj.get("reindex-interval-seconds") is not None else 3600 97 }) 98 return _obj
Configuration for Web Crawler connector
min_time_between_requests: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
max_error_count: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
max_urls: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
max_depth: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
reindex_interval_seconds: Union[Annotated[float, Strict(strict=True)], Annotated[int, Strict(strict=True)], NoneType]
model_config =
{'populate_by_name': True, 'validate_assignment': True, 'protected_namespaces': (), 'validate_by_alias': True, 'validate_by_name': True}
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
def
to_str(self) -> str:
46 def to_str(self) -> str: 47 """Returns the string representation of the model using alias""" 48 return pprint.pformat(self.model_dump(by_alias=True))
Returns the string representation of the model using alias
def
to_json(self) -> str:
50 def to_json(self) -> str: 51 """Returns the JSON representation of the model using alias""" 52 # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead 53 return json.dumps(self.to_dict())
Returns the JSON representation of the model using alias
@classmethod
def
from_json(cls, json_str: str) -> Optional[Self]:
55 @classmethod 56 def from_json(cls, json_str: str) -> Optional[Self]: 57 """Create an instance of WEBCRAWLERConfig from a JSON string""" 58 return cls.from_dict(json.loads(json_str))
Create an instance of WEBCRAWLERConfig from a JSON string
def
to_dict(self) -> Dict[str, Any]:
60 def to_dict(self) -> Dict[str, Any]: 61 """Return the dictionary representation of the model using alias. 62 63 This has the following differences from calling pydantic's 64 `self.model_dump(by_alias=True)`: 65 66 * `None` is only added to the output dict for nullable fields that 67 were set at model initialization. Other fields with value `None` 68 are ignored. 69 """ 70 excluded_fields: Set[str] = set([ 71 ]) 72 73 _dict = self.model_dump( 74 by_alias=True, 75 exclude=excluded_fields, 76 exclude_none=True, 77 ) 78 return _dict
Return the dictionary representation of the model using alias.
This has the following differences from calling pydantic's
self.model_dump(by_alias=True)
:
None
is only added to the output dict for nullable fields that were set at model initialization. Other fields with valueNone
are ignored.
@classmethod
def
from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
80 @classmethod 81 def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: 82 """Create an instance of WEBCRAWLERConfig from a dict""" 83 if obj is None: 84 return None 85 86 if not isinstance(obj, dict): 87 return cls.model_validate(obj) 88 89 _obj = cls.model_validate({ 90 "allowed-domains-opt": obj.get("allowed-domains-opt"), 91 "forbidden-paths": obj.get("forbidden-paths"), 92 "min-time-between-requests": obj.get("min-time-between-requests") if obj.get("min-time-between-requests") is not None else 500, 93 "max-error-count": obj.get("max-error-count") if obj.get("max-error-count") is not None else 5, 94 "max-urls": obj.get("max-urls") if obj.get("max-urls") is not None else 1000, 95 "max-depth": obj.get("max-depth") if obj.get("max-depth") is not None else 50, 96 "reindex-interval-seconds": obj.get("reindex-interval-seconds") if obj.get("reindex-interval-seconds") is not None else 3600 97 }) 98 return _obj
Create an instance of WEBCRAWLERConfig from a dict