mirror of
https://github.com/microsoft/markitdown.git
synced 2025-11-26 06:56:53 +00:00
Move github handling from convert to convert_url
This commit is contained in:
parent
f1274dca87
commit
0b6554738c
@ -967,19 +967,6 @@ class MarkItDown:
|
|||||||
- source: can be a string representing a path or url, or a requests.response object
|
- source: can be a string representing a path or url, or a requests.response object
|
||||||
- extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
|
- extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
|
||||||
"""
|
"""
|
||||||
# Handle GitHub issue URLs directly
|
|
||||||
if isinstance(source, str):
|
|
||||||
parsed_url = urlparse(source)
|
|
||||||
if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path:
|
|
||||||
github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN"))
|
|
||||||
if not github_token:
|
|
||||||
raise ValueError(
|
|
||||||
"GitHub token is required for GitHub issue conversion."
|
|
||||||
)
|
|
||||||
return GitHubIssueConverter().convert(
|
|
||||||
issue_url=source, github_token=github_token
|
|
||||||
)
|
|
||||||
|
|
||||||
# Local path or url
|
# Local path or url
|
||||||
if isinstance(source, str):
|
if isinstance(source, str):
|
||||||
if (
|
if (
|
||||||
@ -994,6 +981,26 @@ class MarkItDown:
|
|||||||
elif isinstance(source, requests.Response):
|
elif isinstance(source, requests.Response):
|
||||||
return self.convert_response(source, **kwargs)
|
return self.convert_response(source, **kwargs)
|
||||||
|
|
||||||
|
def convert_url(
|
||||||
|
self, url: str, **kwargs: Any
|
||||||
|
) -> DocumentConverterResult: # TODO: fix kwargs type
|
||||||
|
# Handle GitHub issue URLs directly
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path:
|
||||||
|
github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN"))
|
||||||
|
if not github_token:
|
||||||
|
raise ValueError(
|
||||||
|
"GitHub token is required for GitHub issue conversion."
|
||||||
|
)
|
||||||
|
return GitHubIssueConverter().convert(
|
||||||
|
issue_url=url, github_token=github_token
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send a HTTP request to the URL
|
||||||
|
response = self._requests_session.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
return self.convert_response(response, **kwargs)
|
||||||
|
|
||||||
def convert_local(
|
def convert_local(
|
||||||
self, path: str, **kwargs: Any
|
self, path: str, **kwargs: Any
|
||||||
) -> DocumentConverterResult: # TODO: deal with kwargs
|
) -> DocumentConverterResult: # TODO: deal with kwargs
|
||||||
@ -1048,14 +1055,6 @@ class MarkItDown:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def convert_url(
|
|
||||||
self, url: str, **kwargs: Any
|
|
||||||
) -> DocumentConverterResult: # TODO: fix kwargs type
|
|
||||||
# Send a HTTP request to the URL
|
|
||||||
response = self._requests_session.get(url, stream=True)
|
|
||||||
response.raise_for_status()
|
|
||||||
return self.convert_response(response, **kwargs)
|
|
||||||
|
|
||||||
def convert_response(
|
def convert_response(
|
||||||
self, response: requests.Response, **kwargs: Any
|
self, response: requests.Response, **kwargs: Any
|
||||||
) -> DocumentConverterResult: # TODO fix kwargs type
|
) -> DocumentConverterResult: # TODO fix kwargs type
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user