Add CliMix class to wrap both BaseConfig and CliMixin (#1957)

### Description
Add new class to wrap base config and cli mixin to help with typing:
```python
class CliConfig(BaseConfig, CliMixin):
    pass
```
This commit is contained in:
Roman Isecke 2023-11-02 17:18:40 -04:00 committed by GitHub
parent 901704b6c0
commit b58d0dde3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 70 additions and 84 deletions

View File

@ -1,4 +1,4 @@
## 0.10.29-dev9 ## 0.10.29-dev10
### Enhancements ### Enhancements

View File

@ -1 +1 @@
__version__ = "0.10.29-dev9" # pragma: no cover __version__ = "0.10.29-dev10" # pragma: no cover

View File

@ -2,7 +2,7 @@ import typing as t
from abc import ABC from abc import ABC
from dataclasses import dataclass, field from dataclasses import dataclass, field
from unstructured.ingest.cli.interfaces import CliMixin from unstructured.ingest.cli.interfaces import CliConfig
from unstructured.ingest.interfaces import BaseConfig from unstructured.ingest.interfaces import BaseConfig
@ -10,7 +10,7 @@ from unstructured.ingest.interfaces import BaseConfig
class BaseCmd(ABC): class BaseCmd(ABC):
cmd_name: str cmd_name: str
cli_config: t.Optional[t.Type[BaseConfig]] = None cli_config: t.Optional[t.Type[BaseConfig]] = None
additional_cli_options: t.List[t.Type[CliMixin]] = field(default_factory=list) additional_cli_options: t.List[t.Type[CliConfig]] = field(default_factory=list)
addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict) addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict)
is_fsspec: bool = False is_fsspec: bool = False

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class AirtableCliConfig(BaseConfig, CliMixin): class AirtableCliConfig(CliConfig):
personal_access_token: t.Optional[str] = None personal_access_token: t.Optional[str] = None
@staticmethod @staticmethod

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
CMD_NAME = "azure" CMD_NAME = "azure"
@dataclass @dataclass
class AzureCliConfig(BaseConfig, CliMixin): class AzureCliConfig(CliConfig):
account_id: t.Optional[str] = None account_id: t.Optional[str] = None
account_name: t.Optional[str] = None account_name: t.Optional[str] = None
connection_string: t.Optional[str] = None connection_string: t.Optional[str] = None

View File

@ -4,13 +4,12 @@ from dataclasses import dataclass
import click import click
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class AzureCognitiveSearchCliWriteConfig(BaseConfig, CliMixin): class AzureCognitiveSearchCliWriteConfig(CliConfig):
key: str key: str
endpoint: str endpoint: str
index: str index: str

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class BiomedCliConfig(BaseConfig, CliMixin): class BiomedCliConfig(CliConfig):
api_id: t.Optional[str] = None api_id: t.Optional[str] = None
api_from: t.Optional[str] = None api_from: t.Optional[str] = None
api_until: t.Optional[str] = None api_until: t.Optional[str] = None

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
CMD_NAME = "box" CMD_NAME = "box"
@dataclass @dataclass
class BoxCliConfig(BaseConfig, CliMixin): class BoxCliConfig(CliConfig):
box_app_config: t.Optional[str] = None box_app_config: t.Optional[str] = None
@staticmethod @staticmethod

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class ConfluenceCliConfig(BaseConfig, CliMixin): class ConfluenceCliConfig(CliConfig):
api_token: str api_token: str
url: str url: str
user_email: str user_email: str

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
CMD_NAME = "delta-table" CMD_NAME = "delta-table"
@dataclass @dataclass
class DeltaTableCliConfig(BaseConfig, CliMixin): class DeltaTableCliConfig(CliConfig):
table_uri: str table_uri: str
version: t.Optional[int] = None version: t.Optional[int] = None
storage_options: t.Optional[str] = None storage_options: t.Optional[str] = None
@ -51,7 +50,7 @@ class DeltaTableCliConfig(BaseConfig, CliMixin):
@dataclass @dataclass
class DeltaTableCliWriteConfig(BaseConfig, CliMixin): class DeltaTableCliWriteConfig(CliConfig):
write_column: str write_column: str
mode: t.Literal["error", "append", "overwrite", "ignore"] = "error" mode: t.Literal["error", "append", "overwrite", "ignore"] = "error"

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class DiscordCliConfig(BaseConfig, CliMixin): class DiscordCliConfig(CliConfig):
channels: t.List[str] channels: t.List[str]
token: str token: str
period: t.Optional[int] = None period: t.Optional[int] = None

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
CMD_NAME = "dropbox" CMD_NAME = "dropbox"
@dataclass @dataclass
class DropboxCliConfig(BaseConfig, CliMixin): class DropboxCliConfig(CliConfig):
token: str token: str
@staticmethod @staticmethod

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class ElasticsearchCliConfig(BaseConfig, CliMixin): class ElasticsearchCliConfig(CliConfig):
index_name: str index_name: str
url: str url: str
jq_query: t.Optional[str] = None jq_query: t.Optional[str] = None

View File

@ -4,14 +4,16 @@ from dataclasses import dataclass
import click import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import CliMixin, FileOrJson from unstructured.ingest.cli.interfaces import (
from unstructured.ingest.interfaces import BaseConfig CliConfig,
FileOrJson,
)
CMD_NAME = "gcs" CMD_NAME = "gcs"
@dataclass @dataclass
class GcsCliConfig(BaseConfig, CliMixin): class GcsCliConfig(CliConfig):
service_account_key: t.Optional[t.Union[dict, str]] = None service_account_key: t.Optional[t.Union[dict, str]] = None
@staticmethod @staticmethod

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class GithubCliConfig(BaseConfig, CliMixin): class GithubCliConfig(CliConfig):
url: str url: str
git_access_token: t.Optional[str] = None git_access_token: t.Optional[str] = None
git_branch: t.Optional[str] = None git_branch: t.Optional[str] = None

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class GitlabCliConfig(BaseConfig, CliMixin): class GitlabCliConfig(CliConfig):
url: str url: str
git_access_token: t.Optional[str] = None git_access_token: t.Optional[str] = None
git_branch: t.Optional[str] = None git_branch: t.Optional[str] = None

View File

@ -4,12 +4,15 @@ from dataclasses import dataclass
import click import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import CliMixin, CliRecursiveConfig, FileOrJson from unstructured.ingest.cli.interfaces import (
from unstructured.ingest.interfaces import BaseConfig CliConfig,
CliRecursiveConfig,
FileOrJson,
)
@dataclass @dataclass
class GoogleDriveCliConfig(BaseConfig, CliMixin): class GoogleDriveCliConfig(CliConfig):
drive_id: str drive_id: str
service_account_key: t.Union[dict, str] service_account_key: t.Union[dict, str]
extension: t.Optional[str] = None extension: t.Optional[str] = None

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class JiraCliConfig(BaseConfig, CliMixin): class JiraCliConfig(CliConfig):
api_token: str api_token: str
url: str url: str
user_email: str user_email: str

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
CliRecursiveConfig, CliRecursiveConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class LocalCliConfig(BaseConfig, CliMixin): class LocalCliConfig(CliConfig):
input_path: str input_path: str
file_glob: t.Optional[str] = None file_glob: t.Optional[str] = None

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
CliRecursiveConfig, CliRecursiveConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class NotionCliConfig(BaseConfig, CliMixin): class NotionCliConfig(CliConfig):
notion_api_key: str notion_api_key: str
page_ids: t.Optional[t.List[str]] page_ids: t.Optional[t.List[str]]
database_ids: t.Optional[t.List[str]] database_ids: t.Optional[t.List[str]]

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
CliRecursiveConfig, CliRecursiveConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class OnedriveCliConfig(BaseConfig, CliMixin): class OnedriveCliConfig(CliConfig):
client_id: str client_id: str
client_cred: str client_cred: str
user_pname: str user_pname: str

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
CliRecursiveConfig, CliRecursiveConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class OutlookCliConfig(BaseConfig, CliMixin): class OutlookCliConfig(CliConfig):
client_id: str client_id: str
user_email: str user_email: str
tenant: t.Optional[str] = "common" tenant: t.Optional[str] = "common"

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class RedditCliConfig(BaseConfig, CliMixin): class RedditCliConfig(CliConfig):
client_id: str client_id: str
client_secret: str client_secret: str
subreddit_name: str subreddit_name: str

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
CMD_NAME = "s3" CMD_NAME = "s3"
@dataclass @dataclass
class S3CliConfig(BaseConfig, CliMixin): class S3CliConfig(CliConfig):
anonymous: bool = False anonymous: bool = False
endpoint_url: t.Optional[str] = None endpoint_url: t.Optional[str] = None

View File

@ -5,15 +5,14 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
CliRecursiveConfig, CliRecursiveConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class SalesforceCliConfig(BaseConfig, CliMixin): class SalesforceCliConfig(CliConfig):
username: str username: str
consumer_key: str consumer_key: str
private_key_path: str private_key_path: str

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
CliRecursiveConfig, CliRecursiveConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class SharepointCliConfig(BaseConfig, CliMixin): class SharepointCliConfig(CliConfig):
client_id: t.Optional[str] = None client_id: t.Optional[str] = None
client_cred: t.Optional[str] = None client_cred: t.Optional[str] = None
site: t.Optional[str] = None site: t.Optional[str] = None

View File

@ -5,14 +5,13 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
DelimitedString, DelimitedString,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class SlackCliConfig(BaseConfig, CliMixin): class SlackCliConfig(CliConfig):
token: str token: str
channels: t.List[str] channels: t.List[str]
start_date: t.Optional[str] = None start_date: t.Optional[str] = None

View File

@ -5,13 +5,12 @@ import click
from unstructured.ingest.cli.base.src import BaseSrcCmd from unstructured.ingest.cli.base.src import BaseSrcCmd
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliMixin, CliConfig,
) )
from unstructured.ingest.interfaces import BaseConfig
@dataclass @dataclass
class WikipediaCliConfig(BaseConfig, CliMixin): class WikipediaCliConfig(CliConfig):
page_title: str page_title: str
auto_suggest: bool = True auto_suggest: bool = True

View File

@ -108,6 +108,10 @@ class CliMixin:
cmd.params.append(param) cmd.params.append(param)
class CliConfig(BaseConfig, CliMixin):
pass
class CliRetryStrategyConfig(RetryStrategyConfig, CliMixin): class CliRetryStrategyConfig(RetryStrategyConfig, CliMixin):
@staticmethod @staticmethod
def get_cli_options() -> t.List[click.Option]: def get_cli_options() -> t.List[click.Option]:
@ -319,7 +323,7 @@ class CliPartitionConfig(PartitionConfig, CliMixin):
return options return options
class CliRecursiveConfig(BaseConfig, CliMixin): class CliRecursiveConfig(CliConfig):
recursive: bool recursive: bool
@staticmethod @staticmethod

View File

@ -5,8 +5,8 @@ import click
from unstructured.ingest.cli.interfaces import ( from unstructured.ingest.cli.interfaces import (
CliChunkingConfig, CliChunkingConfig,
CliConfig,
CliEmbeddingConfig, CliEmbeddingConfig,
CliMixin,
CliPartitionConfig, CliPartitionConfig,
CliPermissionsConfig, CliPermissionsConfig,
CliProcessorConfig, CliProcessorConfig,
@ -51,8 +51,8 @@ def extract_configs(
return res return res
def add_options(cmd: click.Command, extras=t.List[t.Type[CliMixin]], is_src=True) -> click.Command: def add_options(cmd: click.Command, extras=t.List[t.Type[CliConfig]], is_src=True) -> click.Command:
configs: t.List[t.Type[CliMixin]] = ( configs: t.List[t.Type[CliConfig]] = (
[ [
CliPartitionConfig, CliPartitionConfig,
CliReadConfig, CliReadConfig,