feat(ingest/openapi): support proxies and alternate auth schemes (#9492)

Co-authored-by: Fernando Marino <f.marino@rheagroup.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
This commit is contained in:
Fernando Marino` 2023-12-28 01:24:25 +01:00 committed by GitHub
parent cfb4d2f95f
commit b7a0bbcb3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 16 deletions

View File

@ -52,6 +52,13 @@ class OpenApiConfig(ConfigModel):
ignore_endpoints: list = Field(default=[], description="") ignore_endpoints: list = Field(default=[], description="")
username: str = Field(default="", description="") username: str = Field(default="", description="")
password: str = Field(default="", description="") password: str = Field(default="", description="")
proxies: Optional[dict] = Field(
default=None,
description="Eg. "
"`{'http': 'http://10.10.1.10:3128', 'https': 'http://10.10.1.10:1080'}`."
"If authentication is required, add it to the proxy url directly e.g. "
"`http://user:pass@10.10.1.10:3128/`.",
)
forced_examples: dict = Field(default={}, description="") forced_examples: dict = Field(default={}, description="")
token: Optional[str] = Field(default=None, description="") token: Optional[str] = Field(default=None, description="")
get_token: dict = Field(default={}, description="") get_token: dict = Field(default={}, description="")
@ -87,9 +94,13 @@ class OpenApiConfig(ConfigModel):
password=self.password, password=self.password,
tok_url=url4req, tok_url=url4req,
method=self.get_token["request_type"], method=self.get_token["request_type"],
proxies=self.proxies,
) )
sw_dict = get_swag_json( sw_dict = get_swag_json(
self.url, token=self.token, swagger_file=self.swagger_file self.url,
token=self.token,
swagger_file=self.swagger_file,
proxies=self.proxies,
) # load the swagger file ) # load the swagger file
else: # using basic auth for accessing endpoints else: # using basic auth for accessing endpoints
@ -98,6 +109,7 @@ class OpenApiConfig(ConfigModel):
username=self.username, username=self.username,
password=self.password, password=self.password,
swagger_file=self.swagger_file, swagger_file=self.swagger_file,
proxies=self.proxies,
) )
return sw_dict return sw_dict
@ -258,10 +270,15 @@ class APISource(Source, ABC):
tot_url = clean_url(config.url + self.url_basepath + endpoint_k) tot_url = clean_url(config.url + self.url_basepath + endpoint_k)
if config.token: if config.token:
response = request_call(tot_url, token=config.token) response = request_call(
tot_url, token=config.token, proxies=config.proxies
)
else: else:
response = request_call( response = request_call(
tot_url, username=config.username, password=config.password tot_url,
username=config.username,
password=config.password,
proxies=config.proxies,
) )
if response.status_code == 200: if response.status_code == 200:
fields2add, root_dataset_samples[dataset_name] = extract_fields( fields2add, root_dataset_samples[dataset_name] = extract_fields(
@ -281,10 +298,15 @@ class APISource(Source, ABC):
url_guess = try_guessing(endpoint_k, root_dataset_samples) url_guess = try_guessing(endpoint_k, root_dataset_samples)
tot_url = clean_url(config.url + self.url_basepath + url_guess) tot_url = clean_url(config.url + self.url_basepath + url_guess)
if config.token: if config.token:
response = request_call(tot_url, token=config.token) response = request_call(
tot_url, token=config.token, proxies=config.proxies
)
else: else:
response = request_call( response = request_call(
tot_url, username=config.username, password=config.password tot_url,
username=config.username,
password=config.password,
proxies=config.proxies,
) )
if response.status_code == 200: if response.status_code == 200:
fields2add, _ = extract_fields(response, dataset_name) fields2add, _ = extract_fields(response, dataset_name)
@ -304,10 +326,15 @@ class APISource(Source, ABC):
) )
tot_url = clean_url(config.url + self.url_basepath + composed_url) tot_url = clean_url(config.url + self.url_basepath + composed_url)
if config.token: if config.token:
response = request_call(tot_url, token=config.token) response = request_call(
tot_url, token=config.token, proxies=config.proxies
)
else: else:
response = request_call( response = request_call(
tot_url, username=config.username, password=config.password tot_url,
username=config.username,
password=config.password,
proxies=config.proxies,
) )
if response.status_code == 200: if response.status_code == 200:
fields2add, _ = extract_fields(response, dataset_name) fields2add, _ = extract_fields(response, dataset_name)

View File

@ -51,6 +51,7 @@ def request_call(
token: Optional[str] = None, token: Optional[str] = None,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None, password: Optional[str] = None,
proxies: Optional[dict] = None,
) -> requests.Response: ) -> requests.Response:
headers = {"accept": "application/json"} headers = {"accept": "application/json"}
@ -60,8 +61,8 @@ def request_call(
) )
elif token is not None: elif token is not None:
headers["Authorization"] = f"Bearer {token}" headers["Authorization"] = f"{token}"
return requests.get(url, headers=headers) return requests.get(url, proxies=proxies, headers=headers)
else: else:
return requests.get(url, headers=headers) return requests.get(url, headers=headers)
@ -72,12 +73,15 @@ def get_swag_json(
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None, password: Optional[str] = None,
swagger_file: str = "", swagger_file: str = "",
proxies: Optional[dict] = None,
) -> Dict: ) -> Dict:
tot_url = url + swagger_file tot_url = url + swagger_file
if token is not None: if token is not None:
response = request_call(url=tot_url, token=token) response = request_call(url=tot_url, token=token, proxies=proxies)
else: else:
response = request_call(url=tot_url, username=username, password=password) response = request_call(
url=tot_url, username=username, password=password, proxies=proxies
)
if response.status_code != 200: if response.status_code != 200:
raise Exception(f"Unable to retrieve {tot_url}, error {response.status_code}") raise Exception(f"Unable to retrieve {tot_url}, error {response.status_code}")
@ -251,7 +255,7 @@ def compose_url_attr(raw_url: str, attr_list: list) -> str:
attr_list=["2",]) attr_list=["2",])
asd2 == "http://asd.com/2" asd2 == "http://asd.com/2"
""" """
splitted = re.split(r"\{[^}]+\}", raw_url) splitted = re.split(r"\{[^}]+}", raw_url)
if splitted[-1] == "": # it can happen that the last element is empty if splitted[-1] == "": # it can happen that the last element is empty
splitted = splitted[:-1] splitted = splitted[:-1]
composed_url = "" composed_url = ""
@ -265,7 +269,7 @@ def compose_url_attr(raw_url: str, attr_list: list) -> str:
def maybe_theres_simple_id(url: str) -> str: def maybe_theres_simple_id(url: str) -> str:
dets = re.findall(r"(\{[^}]+\})", url) # searching the fields between parenthesis dets = re.findall(r"(\{[^}]+})", url) # searching the fields between parenthesis
if len(dets) == 0: if len(dets) == 0:
return url return url
dets_w_id = [det for det in dets if "id" in det] # the fields containing "id" dets_w_id = [det for det in dets if "id" in det] # the fields containing "id"
@ -349,6 +353,7 @@ def get_tok(
password: str = "", password: str = "",
tok_url: str = "", tok_url: str = "",
method: str = "post", method: str = "post",
proxies: Optional[dict] = None,
) -> str: ) -> str:
""" """
Trying to post username/password to get auth. Trying to post username/password to get auth.
@ -357,12 +362,15 @@ def get_tok(
url4req = url + tok_url url4req = url + tok_url
if method == "post": if method == "post":
# this will make a POST call with username and password # this will make a POST call with username and password
data = {"username": username, "password": password} data = {"username": username, "password": password, "maxDuration": True}
# url2post = url + "api/authenticate/" # url2post = url + "api/authenticate/"
response = requests.post(url4req, data=data) response = requests.post(url4req, proxies=proxies, json=data)
if response.status_code == 200: if response.status_code == 200:
cont = json.loads(response.content) cont = json.loads(response.content)
token = cont["tokens"]["access"] if "token" in cont: # other authentication scheme
token = cont["token"]
else: # works only for bearer authentication scheme
token = f"Bearer {cont['tokens']['access']}"
elif method == "get": elif method == "get":
# this will make a GET call with username and password # this will make a GET call with username and password
response = requests.get(url4req) response = requests.get(url4req)