mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-05 23:23:42 +00:00
feat(ingest/openapi): support proxies and alternate auth schemes (#9492)
Co-authored-by: Fernando Marino <f.marino@rheagroup.com> Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
parent
cfb4d2f95f
commit
b7a0bbcb3d
@ -52,6 +52,13 @@ class OpenApiConfig(ConfigModel):
|
|||||||
ignore_endpoints: list = Field(default=[], description="")
|
ignore_endpoints: list = Field(default=[], description="")
|
||||||
username: str = Field(default="", description="")
|
username: str = Field(default="", description="")
|
||||||
password: str = Field(default="", description="")
|
password: str = Field(default="", description="")
|
||||||
|
proxies: Optional[dict] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Eg. "
|
||||||
|
"`{'http': 'http://10.10.1.10:3128', 'https': 'http://10.10.1.10:1080'}`."
|
||||||
|
"If authentication is required, add it to the proxy url directly e.g. "
|
||||||
|
"`http://user:pass@10.10.1.10:3128/`.",
|
||||||
|
)
|
||||||
forced_examples: dict = Field(default={}, description="")
|
forced_examples: dict = Field(default={}, description="")
|
||||||
token: Optional[str] = Field(default=None, description="")
|
token: Optional[str] = Field(default=None, description="")
|
||||||
get_token: dict = Field(default={}, description="")
|
get_token: dict = Field(default={}, description="")
|
||||||
@ -87,9 +94,13 @@ class OpenApiConfig(ConfigModel):
|
|||||||
password=self.password,
|
password=self.password,
|
||||||
tok_url=url4req,
|
tok_url=url4req,
|
||||||
method=self.get_token["request_type"],
|
method=self.get_token["request_type"],
|
||||||
|
proxies=self.proxies,
|
||||||
)
|
)
|
||||||
sw_dict = get_swag_json(
|
sw_dict = get_swag_json(
|
||||||
self.url, token=self.token, swagger_file=self.swagger_file
|
self.url,
|
||||||
|
token=self.token,
|
||||||
|
swagger_file=self.swagger_file,
|
||||||
|
proxies=self.proxies,
|
||||||
) # load the swagger file
|
) # load the swagger file
|
||||||
|
|
||||||
else: # using basic auth for accessing endpoints
|
else: # using basic auth for accessing endpoints
|
||||||
@ -98,6 +109,7 @@ class OpenApiConfig(ConfigModel):
|
|||||||
username=self.username,
|
username=self.username,
|
||||||
password=self.password,
|
password=self.password,
|
||||||
swagger_file=self.swagger_file,
|
swagger_file=self.swagger_file,
|
||||||
|
proxies=self.proxies,
|
||||||
)
|
)
|
||||||
return sw_dict
|
return sw_dict
|
||||||
|
|
||||||
@ -258,10 +270,15 @@ class APISource(Source, ABC):
|
|||||||
tot_url = clean_url(config.url + self.url_basepath + endpoint_k)
|
tot_url = clean_url(config.url + self.url_basepath + endpoint_k)
|
||||||
|
|
||||||
if config.token:
|
if config.token:
|
||||||
response = request_call(tot_url, token=config.token)
|
response = request_call(
|
||||||
|
tot_url, token=config.token, proxies=config.proxies
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
response = request_call(
|
response = request_call(
|
||||||
tot_url, username=config.username, password=config.password
|
tot_url,
|
||||||
|
username=config.username,
|
||||||
|
password=config.password,
|
||||||
|
proxies=config.proxies,
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
fields2add, root_dataset_samples[dataset_name] = extract_fields(
|
fields2add, root_dataset_samples[dataset_name] = extract_fields(
|
||||||
@ -281,10 +298,15 @@ class APISource(Source, ABC):
|
|||||||
url_guess = try_guessing(endpoint_k, root_dataset_samples)
|
url_guess = try_guessing(endpoint_k, root_dataset_samples)
|
||||||
tot_url = clean_url(config.url + self.url_basepath + url_guess)
|
tot_url = clean_url(config.url + self.url_basepath + url_guess)
|
||||||
if config.token:
|
if config.token:
|
||||||
response = request_call(tot_url, token=config.token)
|
response = request_call(
|
||||||
|
tot_url, token=config.token, proxies=config.proxies
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
response = request_call(
|
response = request_call(
|
||||||
tot_url, username=config.username, password=config.password
|
tot_url,
|
||||||
|
username=config.username,
|
||||||
|
password=config.password,
|
||||||
|
proxies=config.proxies,
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
fields2add, _ = extract_fields(response, dataset_name)
|
fields2add, _ = extract_fields(response, dataset_name)
|
||||||
@ -304,10 +326,15 @@ class APISource(Source, ABC):
|
|||||||
)
|
)
|
||||||
tot_url = clean_url(config.url + self.url_basepath + composed_url)
|
tot_url = clean_url(config.url + self.url_basepath + composed_url)
|
||||||
if config.token:
|
if config.token:
|
||||||
response = request_call(tot_url, token=config.token)
|
response = request_call(
|
||||||
|
tot_url, token=config.token, proxies=config.proxies
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
response = request_call(
|
response = request_call(
|
||||||
tot_url, username=config.username, password=config.password
|
tot_url,
|
||||||
|
username=config.username,
|
||||||
|
password=config.password,
|
||||||
|
proxies=config.proxies,
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
fields2add, _ = extract_fields(response, dataset_name)
|
fields2add, _ = extract_fields(response, dataset_name)
|
||||||
|
|||||||
@ -51,6 +51,7 @@ def request_call(
|
|||||||
token: Optional[str] = None,
|
token: Optional[str] = None,
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
|
proxies: Optional[dict] = None,
|
||||||
) -> requests.Response:
|
) -> requests.Response:
|
||||||
headers = {"accept": "application/json"}
|
headers = {"accept": "application/json"}
|
||||||
|
|
||||||
@ -60,8 +61,8 @@ def request_call(
|
|||||||
)
|
)
|
||||||
|
|
||||||
elif token is not None:
|
elif token is not None:
|
||||||
headers["Authorization"] = f"Bearer {token}"
|
headers["Authorization"] = f"{token}"
|
||||||
return requests.get(url, headers=headers)
|
return requests.get(url, proxies=proxies, headers=headers)
|
||||||
else:
|
else:
|
||||||
return requests.get(url, headers=headers)
|
return requests.get(url, headers=headers)
|
||||||
|
|
||||||
@ -72,12 +73,15 @@ def get_swag_json(
|
|||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
swagger_file: str = "",
|
swagger_file: str = "",
|
||||||
|
proxies: Optional[dict] = None,
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
tot_url = url + swagger_file
|
tot_url = url + swagger_file
|
||||||
if token is not None:
|
if token is not None:
|
||||||
response = request_call(url=tot_url, token=token)
|
response = request_call(url=tot_url, token=token, proxies=proxies)
|
||||||
else:
|
else:
|
||||||
response = request_call(url=tot_url, username=username, password=password)
|
response = request_call(
|
||||||
|
url=tot_url, username=username, password=password, proxies=proxies
|
||||||
|
)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise Exception(f"Unable to retrieve {tot_url}, error {response.status_code}")
|
raise Exception(f"Unable to retrieve {tot_url}, error {response.status_code}")
|
||||||
@ -251,7 +255,7 @@ def compose_url_attr(raw_url: str, attr_list: list) -> str:
|
|||||||
attr_list=["2",])
|
attr_list=["2",])
|
||||||
asd2 == "http://asd.com/2"
|
asd2 == "http://asd.com/2"
|
||||||
"""
|
"""
|
||||||
splitted = re.split(r"\{[^}]+\}", raw_url)
|
splitted = re.split(r"\{[^}]+}", raw_url)
|
||||||
if splitted[-1] == "": # it can happen that the last element is empty
|
if splitted[-1] == "": # it can happen that the last element is empty
|
||||||
splitted = splitted[:-1]
|
splitted = splitted[:-1]
|
||||||
composed_url = ""
|
composed_url = ""
|
||||||
@ -265,7 +269,7 @@ def compose_url_attr(raw_url: str, attr_list: list) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def maybe_theres_simple_id(url: str) -> str:
|
def maybe_theres_simple_id(url: str) -> str:
|
||||||
dets = re.findall(r"(\{[^}]+\})", url) # searching the fields between parenthesis
|
dets = re.findall(r"(\{[^}]+})", url) # searching the fields between parenthesis
|
||||||
if len(dets) == 0:
|
if len(dets) == 0:
|
||||||
return url
|
return url
|
||||||
dets_w_id = [det for det in dets if "id" in det] # the fields containing "id"
|
dets_w_id = [det for det in dets if "id" in det] # the fields containing "id"
|
||||||
@ -349,6 +353,7 @@ def get_tok(
|
|||||||
password: str = "",
|
password: str = "",
|
||||||
tok_url: str = "",
|
tok_url: str = "",
|
||||||
method: str = "post",
|
method: str = "post",
|
||||||
|
proxies: Optional[dict] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Trying to post username/password to get auth.
|
Trying to post username/password to get auth.
|
||||||
@ -357,12 +362,15 @@ def get_tok(
|
|||||||
url4req = url + tok_url
|
url4req = url + tok_url
|
||||||
if method == "post":
|
if method == "post":
|
||||||
# this will make a POST call with username and password
|
# this will make a POST call with username and password
|
||||||
data = {"username": username, "password": password}
|
data = {"username": username, "password": password, "maxDuration": True}
|
||||||
# url2post = url + "api/authenticate/"
|
# url2post = url + "api/authenticate/"
|
||||||
response = requests.post(url4req, data=data)
|
response = requests.post(url4req, proxies=proxies, json=data)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
cont = json.loads(response.content)
|
cont = json.loads(response.content)
|
||||||
token = cont["tokens"]["access"]
|
if "token" in cont: # other authentication scheme
|
||||||
|
token = cont["token"]
|
||||||
|
else: # works only for bearer authentication scheme
|
||||||
|
token = f"Bearer {cont['tokens']['access']}"
|
||||||
elif method == "get":
|
elif method == "get":
|
||||||
# this will make a GET call with username and password
|
# this will make a GET call with username and password
|
||||||
response = requests.get(url4req)
|
response = requests.get(url4req)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user