diff --git a/api/http_api_reference.md b/api/http_api_reference.md index 8fe0a5215..f4d89aaef 100644 --- a/api/http_api_reference.md +++ b/api/http_api_reference.md @@ -88,6 +88,7 @@ curl --request POST \ - `"picture"`: Picture - `"one"`: One - `"knowledge_graph"`: Knowledge Graph + - `"email"`: Email - `"parser_config"`: (*Body parameter*), `object` The configuration settings for the dataset parser. The attributes in this JSON object vary with the selected `"chunk_method"`: @@ -100,7 +101,7 @@ curl --request POST \ - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. - - If `"chunk_method"` is `"table"` or `"one"`, `"parser_config"` is an empty JSON object. + - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: - `"chunk_token_count"`: Defaults to `128`. - `"delimiter"`: Defaults to `"\n!?。;!?"`. @@ -517,6 +518,7 @@ curl --request PUT \ - `"picture"`: Picture - `"one"`: One - `"knowledge_graph"`: Knowledge Graph + - `"email"`: Email - `"parser_config"`: (*Body parameter*), `object` The configuration settings for the dataset parser. The attributes in this JSON object vary with the selected `"chunk_method"`: - If `"chunk_method"` is `"naive"`, the `"parser_config"` object contains the following attributes: @@ -528,7 +530,7 @@ curl --request PUT \ - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. - - If `"chunk_method"` is `"table"` or `"one"`, `"parser_config"` is an empty JSON object. + - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: - `"chunk_token_count"`: Defaults to `128`. - `"delimiter"`: Defaults to `"\n!?。;!?"`. diff --git a/api/python_api_reference.md b/api/python_api_reference.md index be73a277f..d906b1593 100644 --- a/api/python_api_reference.md +++ b/api/python_api_reference.md @@ -75,12 +75,13 @@ The chunking method of the dataset to create. Available options: - `"picture"`: Picture - `"one"`: One - `"knowledge_graph"`: Knowledge Graph +- `"email"`: Email #### parser_config -The parser configuration of the dataset. A `ParserConfig` object's attributes vary based on the selected `"chunk_method"`: +The parser configuration of the dataset. A `ParserConfig` object's attributes vary based on the selected `chunk_method`: -- `"chunk_method"`=`"naive"`: +- `chunk_method`=`"naive"`: `{"chunk_token_num":128,"delimiter":"\\n!?;。;!?","html4excel":False,"layout_recognize":True,"raptor":{"user_raptor":False}}`. - `chunk_method`=`"qa"`: `{"raptor": {"user_raptor": False}}` @@ -94,12 +95,16 @@ The parser configuration of the dataset. A `ParserConfig` object's attributes va `{"raptor": {"user_raptor": False}}` - `chunk_method`=`"laws"`: `{"raptor": {"user_raptor": False}}` +- `chunk_method`=`"picture"`: + `None` - `chunk_method`=`"presentation"`: `{"raptor": {"user_raptor": False}}` - `chunk_method`=`"one"`: `None` - `chunk_method`=`"knowledge-graph"`: `{"chunk_token_num":128,"delimiter":"\\n!?;。;!?","entity_types":["organization","person","location","event","time"]}` +- `chunk_method`=`"email"`: + `None` ### Returns @@ -322,6 +327,7 @@ A dictionary representing the attributes to update, with the following keys: - `"picture"`: Picture - `"one"`: One - `"knowledge_graph"`: Knowledge Graph + - `"email"`: Email - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`: - `"chunk_method"`=`"naive"`: `{"chunk_token_num":128,"delimiter":"\\n!?;。;!?","html4excel":False,"layout_recognize":True,"raptor":{"user_raptor":False}}`. @@ -339,10 +345,14 @@ A dictionary representing the attributes to update, with the following keys: `{"raptor": {"user_raptor": False}}` - `chunk_method`=`"presentation"`: `{"raptor": {"user_raptor": False}}` + - `chunk_method`=`"picture"`: + `None` - `chunk_method`=`"one"`: `None` - `chunk_method`=`"knowledge-graph"`: `{"chunk_token_num":128,"delimiter":"\\n!?;。;!?","entity_types":["organization","person","location","event","time"]}` + - `chunk_method`=`"email"`: + `None` ### Returns @@ -475,10 +485,14 @@ A `Document` object contains the following attributes: `{"raptor": {"user_raptor": False}}` - `chunk_method`=`"presentation"`: `{"raptor": {"user_raptor": False}}` + - `chunk_method`=`"picure"`: + `None` - `chunk_method`=`"one"`: `None` - `chunk_method`=`"knowledge-graph"`: `{"chunk_token_num":128,"delimiter": "\\n!?;。;!?","entity_types":["organization","person","location","event","time"]}` + - `chunk_method`=`"email"`: + `None` ### Examples