mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-20 23:28:01 +00:00
fix(ingest): openapi - add support for user, password auth (#4086)
This commit is contained in:
parent
02fe05eb8f
commit
a113e4357e
13
metadata-ingestion/source_docs/openapi.md
Normal file → Executable file
13
metadata-ingestion/source_docs/openapi.md
Normal file → Executable file
@ -30,7 +30,9 @@ source:
|
||||
name: test_endpoint # this name will appear in DatHub
|
||||
url: https://test_endpoint.com/
|
||||
swagger_file: classicapi/doc/swagger.json # where to search for the OpenApi definitions
|
||||
get_token: True # optional, if you need to get an authentication token beforehand
|
||||
get_token: # optional, if you need to get an authentication token beforehand
|
||||
request_type: get
|
||||
url: api/authentication/login?username={username}&password={password}
|
||||
username: your_username # optional
|
||||
password: your_password # optional
|
||||
forced_examples: # optionals
|
||||
@ -137,6 +139,15 @@ and this URL will be called to get back the needed metadata.
|
||||
|
||||
## Config details
|
||||
|
||||
### Token authentication
|
||||
|
||||
If this tool needs to get an access token to interrogate the endpoints, this can be requested. Two methods are available at the moment:
|
||||
|
||||
* 'get' : this requires username/password combination to be present in the url. Note that {username} and {password} are mandatory placeholders. They will be replaced with the true credentials at runtime. Note that username and password will be sent in the request address, so it's unsecure. If your provider allows for the other method, please go for it.
|
||||
* 'post' : username and password will be inserted in the body of the POST request
|
||||
|
||||
In both cases, username and password are the ones defined in the configuration file.
|
||||
|
||||
### Getting dataset metadata from `forced_example`
|
||||
|
||||
Suppose you have an endpoint defined in the swagger file, but without example given, and the tool is
|
||||
|
48
metadata-ingestion/src/datahub/ingestion/source/openapi.py
Normal file → Executable file
48
metadata-ingestion/src/datahub/ingestion/source/openapi.py
Normal file → Executable file
@ -2,7 +2,7 @@ import logging
|
||||
import time
|
||||
import warnings
|
||||
from abc import ABC
|
||||
from typing import Dict, Generator, Iterable, Tuple
|
||||
from typing import Dict, Generator, Iterable, Optional, Tuple
|
||||
|
||||
from datahub.configuration.common import ConfigModel
|
||||
from datahub.emitter.mce_builder import make_tag_urn
|
||||
@ -43,20 +43,46 @@ class OpenApiConfig(ConfigModel):
|
||||
username: str = ""
|
||||
password: str = ""
|
||||
forced_examples: dict = {}
|
||||
token: str = ""
|
||||
get_token: bool = False
|
||||
token: Optional[str] = None
|
||||
get_token: dict = {}
|
||||
|
||||
def get_swagger(self) -> Dict:
|
||||
if self.get_token: # token based authentication, to be tested
|
||||
if self.token == "":
|
||||
if self.get_token or self.token is not None:
|
||||
if self.token is not None:
|
||||
...
|
||||
else:
|
||||
assert (
|
||||
"url_complement" in self.get_token.keys()
|
||||
), "When 'request_type' is set to 'get', an url_complement is needed for the request."
|
||||
if self.get_token["request_type"] == "get":
|
||||
assert (
|
||||
"{username}" in self.get_token["url_complement"]
|
||||
), "we expect the keyword {username} to be present in the url"
|
||||
assert (
|
||||
"{password}" in self.get_token["url_complement"]
|
||||
), "we expect the keyword {password} to be present in the url"
|
||||
url4req = self.get_token["url_complement"].replace(
|
||||
"{username}", self.username
|
||||
)
|
||||
url4req = url4req.replace("{password}", self.password)
|
||||
elif self.get_token["request_type"] == "post":
|
||||
url4req = self.get_token["url_complement"]
|
||||
else:
|
||||
raise KeyError(
|
||||
"This tool accepts only 'get' and 'post' as method for getting tokens"
|
||||
)
|
||||
self.token = get_tok(
|
||||
url=self.url, username=self.username, password=self.password
|
||||
url=self.url,
|
||||
username=self.username,
|
||||
password=self.password,
|
||||
tok_url=url4req,
|
||||
method=self.get_token["request_type"],
|
||||
)
|
||||
|
||||
sw_dict = get_swag_json(
|
||||
self.url, token=self.token, swagger_file=self.swagger_file
|
||||
) # load the swagger file
|
||||
else:
|
||||
|
||||
else: # using basic auth for accessing endpoints
|
||||
sw_dict = get_swag_json(
|
||||
self.url,
|
||||
username=self.username,
|
||||
@ -102,7 +128,9 @@ class APISource(Source, ABC):
|
||||
elif status_code == 504:
|
||||
self.report.report_warning(key=key, reason="Timeout for reaching endpoint")
|
||||
else:
|
||||
raise Exception(f"Unable to retrieve endpoint, response code {status_code}")
|
||||
raise Exception(
|
||||
f"Unable to retrieve endpoint, response code {status_code}, key {key}"
|
||||
)
|
||||
|
||||
def init_dataset(
|
||||
self, endpoint_k: str, endpoint_dets: dict
|
||||
@ -269,7 +297,7 @@ class APISource(Source, ABC):
|
||||
|
||||
class OpenApiSource(APISource):
|
||||
def __init__(self, config: OpenApiConfig, ctx: PipelineContext):
|
||||
super().__init__(config, ctx, "OpenApi")
|
||||
super().__init__(config, ctx, "openapi")
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict, ctx):
|
||||
|
44
metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
Normal file → Executable file
44
metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
Normal file → Executable file
@ -172,7 +172,8 @@ def get_endpoints(sw_dict: dict) -> dict: # noqa: C901
|
||||
if "parameters" in p_o["get"].keys():
|
||||
url_details[p_k]["parameters"] = p_o["get"]["parameters"]
|
||||
|
||||
return url_details
|
||||
ord_d = dict(sorted(url_details.items())) # sorting for convenience
|
||||
return ord_d
|
||||
|
||||
|
||||
def guessing_url_name(url: str, examples: dict) -> str:
|
||||
@ -211,6 +212,10 @@ def guessing_url_name(url: str, examples: dict) -> str:
|
||||
ex2use = root
|
||||
elif root[:-1] in examples.keys():
|
||||
ex2use = root[:-1]
|
||||
elif root.replace("/", ".") in examples.keys():
|
||||
ex2use = root.replace("/", ".")
|
||||
elif root[:-1].replace("/", ".") in examples.keys():
|
||||
ex2use = root[:-1].replace("/", ".")
|
||||
else:
|
||||
return url
|
||||
|
||||
@ -332,19 +337,38 @@ def extract_fields(
|
||||
return [], {}
|
||||
|
||||
|
||||
def get_tok(url: str, username: str = "", password: str = "") -> str:
|
||||
def get_tok(
|
||||
url: str,
|
||||
username: str = "",
|
||||
password: str = "",
|
||||
tok_url: str = "",
|
||||
method: str = "post",
|
||||
) -> str:
|
||||
"""
|
||||
Trying to post username/password to get auth.
|
||||
Simplified version: it expect a POST at api/authenticate
|
||||
"""
|
||||
data = {"username": username, "password": password}
|
||||
url2post = url + "api/authenticate/"
|
||||
response = requests.post(url2post, data=data)
|
||||
if response.status_code == 200:
|
||||
cont = json.loads(response.content)
|
||||
return cont["tokens"]["access"]
|
||||
token = ""
|
||||
url4req = url + tok_url
|
||||
if method == "post":
|
||||
# this will make a POST call with username and password
|
||||
data = {"username": username, "password": password}
|
||||
# url2post = url + "api/authenticate/"
|
||||
response = requests.post(url4req, data=data)
|
||||
if response.status_code == 200:
|
||||
cont = json.loads(response.content)
|
||||
token = cont["tokens"]["access"]
|
||||
elif method == "get":
|
||||
# this will make a GET call with username and password
|
||||
response = requests.get(url4req)
|
||||
if response.status_code == 200:
|
||||
cont = json.loads(response.content)
|
||||
token = cont["token"]
|
||||
else:
|
||||
raise Exception("Unable to get a valid token")
|
||||
raise ValueError(f"Method unrecognised: {method}")
|
||||
if token != "":
|
||||
return token
|
||||
else:
|
||||
raise Exception(f"Unable to get a valid token: {response.text}")
|
||||
|
||||
|
||||
def set_metadata(
|
||||
|
6
metadata-ingestion/tests/integration/openapi/openapi_mces_golden.json
Normal file → Executable file
6
metadata-ingestion/tests/integration/openapi/openapi_mces_golden.json
Normal file → Executable file
@ -3,7 +3,7 @@
|
||||
"auditHeader": null,
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:OpenApi,test_openapi.root,PROD)",
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:openapi,test_openapi.root,PROD)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||
@ -95,7 +95,7 @@
|
||||
"auditHeader": null,
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:OpenApi,test_openapi.v2,PROD)",
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:openapi,test_openapi.v2,PROD)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||
@ -183,4 +183,4 @@
|
||||
"properties": null
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user