fix(ingest): openapi - add support for user, password auth (#4086)

This commit is contained in:
Vincenzo Lavorini 2022-02-25 08:29:01 +01:00 committed by GitHub
parent 02fe05eb8f
commit a113e4357e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 24 deletions

13
metadata-ingestion/source_docs/openapi.md Normal file → Executable file
View File

@ -30,7 +30,9 @@ source:
name: test_endpoint # this name will appear in DatHub
url: https://test_endpoint.com/
swagger_file: classicapi/doc/swagger.json # where to search for the OpenApi definitions
get_token: True # optional, if you need to get an authentication token beforehand
get_token: # optional, if you need to get an authentication token beforehand
request_type: get
url: api/authentication/login?username={username}&password={password}
username: your_username # optional
password: your_password # optional
forced_examples: # optionals
@ -137,6 +139,15 @@ and this URL will be called to get back the needed metadata.
## Config details
### Token authentication
If this tool needs to get an access token to interrogate the endpoints, this can be requested. Two methods are available at the moment:
* 'get' : this requires username/password combination to be present in the url. Note that {username} and {password} are mandatory placeholders. They will be replaced with the true credentials at runtime. Note that username and password will be sent in the request address, so it's unsecure. If your provider allows for the other method, please go for it.
* 'post' : username and password will be inserted in the body of the POST request
In both cases, username and password are the ones defined in the configuration file.
### Getting dataset metadata from `forced_example`
Suppose you have an endpoint defined in the swagger file, but without example given, and the tool is

View File

@ -2,7 +2,7 @@ import logging
import time
import warnings
from abc import ABC
from typing import Dict, Generator, Iterable, Tuple
from typing import Dict, Generator, Iterable, Optional, Tuple
from datahub.configuration.common import ConfigModel
from datahub.emitter.mce_builder import make_tag_urn
@ -43,20 +43,46 @@ class OpenApiConfig(ConfigModel):
username: str = ""
password: str = ""
forced_examples: dict = {}
token: str = ""
get_token: bool = False
token: Optional[str] = None
get_token: dict = {}
def get_swagger(self) -> Dict:
if self.get_token: # token based authentication, to be tested
if self.token == "":
self.token = get_tok(
url=self.url, username=self.username, password=self.password
if self.get_token or self.token is not None:
if self.token is not None:
...
else:
assert (
"url_complement" in self.get_token.keys()
), "When 'request_type' is set to 'get', an url_complement is needed for the request."
if self.get_token["request_type"] == "get":
assert (
"{username}" in self.get_token["url_complement"]
), "we expect the keyword {username} to be present in the url"
assert (
"{password}" in self.get_token["url_complement"]
), "we expect the keyword {password} to be present in the url"
url4req = self.get_token["url_complement"].replace(
"{username}", self.username
)
url4req = url4req.replace("{password}", self.password)
elif self.get_token["request_type"] == "post":
url4req = self.get_token["url_complement"]
else:
raise KeyError(
"This tool accepts only 'get' and 'post' as method for getting tokens"
)
self.token = get_tok(
url=self.url,
username=self.username,
password=self.password,
tok_url=url4req,
method=self.get_token["request_type"],
)
sw_dict = get_swag_json(
self.url, token=self.token, swagger_file=self.swagger_file
) # load the swagger file
else:
else: # using basic auth for accessing endpoints
sw_dict = get_swag_json(
self.url,
username=self.username,
@ -102,7 +128,9 @@ class APISource(Source, ABC):
elif status_code == 504:
self.report.report_warning(key=key, reason="Timeout for reaching endpoint")
else:
raise Exception(f"Unable to retrieve endpoint, response code {status_code}")
raise Exception(
f"Unable to retrieve endpoint, response code {status_code}, key {key}"
)
def init_dataset(
self, endpoint_k: str, endpoint_dets: dict
@ -269,7 +297,7 @@ class APISource(Source, ABC):
class OpenApiSource(APISource):
def __init__(self, config: OpenApiConfig, ctx: PipelineContext):
super().__init__(config, ctx, "OpenApi")
super().__init__(config, ctx, "openapi")
@classmethod
def create(cls, config_dict, ctx):

View File

@ -172,7 +172,8 @@ def get_endpoints(sw_dict: dict) -> dict: # noqa: C901
if "parameters" in p_o["get"].keys():
url_details[p_k]["parameters"] = p_o["get"]["parameters"]
return url_details
ord_d = dict(sorted(url_details.items())) # sorting for convenience
return ord_d
def guessing_url_name(url: str, examples: dict) -> str:
@ -211,6 +212,10 @@ def guessing_url_name(url: str, examples: dict) -> str:
ex2use = root
elif root[:-1] in examples.keys():
ex2use = root[:-1]
elif root.replace("/", ".") in examples.keys():
ex2use = root.replace("/", ".")
elif root[:-1].replace("/", ".") in examples.keys():
ex2use = root[:-1].replace("/", ".")
else:
return url
@ -332,19 +337,38 @@ def extract_fields(
return [], {}
def get_tok(url: str, username: str = "", password: str = "") -> str:
def get_tok(
url: str,
username: str = "",
password: str = "",
tok_url: str = "",
method: str = "post",
) -> str:
"""
Trying to post username/password to get auth.
Simplified version: it expect a POST at api/authenticate
"""
token = ""
url4req = url + tok_url
if method == "post":
# this will make a POST call with username and password
data = {"username": username, "password": password}
url2post = url + "api/authenticate/"
response = requests.post(url2post, data=data)
# url2post = url + "api/authenticate/"
response = requests.post(url4req, data=data)
if response.status_code == 200:
cont = json.loads(response.content)
return cont["tokens"]["access"]
token = cont["tokens"]["access"]
elif method == "get":
# this will make a GET call with username and password
response = requests.get(url4req)
if response.status_code == 200:
cont = json.loads(response.content)
token = cont["token"]
else:
raise Exception("Unable to get a valid token")
raise ValueError(f"Method unrecognised: {method}")
if token != "":
return token
else:
raise Exception(f"Unable to get a valid token: {response.text}")
def set_metadata(

View File

@ -3,7 +3,7 @@
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:OpenApi,test_openapi.root,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:openapi,test_openapi.root,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
@ -95,7 +95,7 @@
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:OpenApi,test_openapi.v2,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:openapi,test_openapi.v2,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {