chore: Integration test to show LabelStudio brick working with SDK (#21)

This commit is contained in:
Yuming Long 2022-10-05 14:38:44 -04:00 committed by GitHub
parent 28a4ae985d
commit 779e48bafe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 426 additions and 11 deletions

View File

@ -20,7 +20,7 @@ idna==3.4
# via requests
imagesize==1.4.1
# via sphinx
importlib-metadata==4.12.0
importlib-metadata==5.0.0
# via sphinx
jinja2==3.1.2
# via sphinx
@ -38,7 +38,7 @@ requests==2.28.1
# via sphinx
snowballstemmer==2.2.0
# via sphinx
sphinx==5.1.1
sphinx==5.2.3
# via
# -r requirements/build.in
# sphinx-rtd-theme

View File

@ -20,6 +20,8 @@ idna==3.4
# via requests
imagesize==1.4.1
# via sphinx
importlib-metadata==5.0.0
# via sphinx
jinja2==3.1.2
# via sphinx
markupsafe==2.1.1
@ -38,10 +40,10 @@ snowballstemmer==2.2.0
# via sphinx
sphinx==5.2.3
# via
# -r build.in
# -r requirements/build.in
# sphinx-rtd-theme
sphinx-rtd-theme==1.0.0
# via -r build.in
# via -r requirements/build.in
sphinxcontrib-applehelp==1.0.2
# via sphinx
sphinxcontrib-devhelp==1.0.2
@ -56,3 +58,5 @@ sphinxcontrib-serializinghtml==1.1.5
# via sphinx
urllib3==1.26.12
# via requests
zipp==3.8.1
# via importlib-metadata

View File

@ -7,3 +7,5 @@ click>=8.1
flake8
mypy
pytest-cov
label_studio_sdk
vcrpy

View File

@ -7,23 +7,37 @@
attrs==22.1.0
# via pytest
black==22.8.0
# via -r test.in
# via -r requirements/test.in
certifi==2022.9.24
# via requests
charset-normalizer==2.1.1
# via requests
click==8.1.3
# via
# -r test.in
# -r requirements/test.in
# black
coverage[toml]==6.4.4
# via
# -r test.in
# -r requirements/test.in
# pytest-cov
flake8==5.0.4
# via -r test.in
# via -r requirements/test.in
idna==3.4
# via
# requests
# yarl
iniconfig==1.1.1
# via pytest
label-studio-sdk==0.0.15
# via -r requirements/test.in
lxml==4.9.1
# via label-studio-sdk
mccabe==0.7.0
# via flake8
multidict==6.0.2
# via yarl
mypy==0.982
# via -r test.in
# via -r requirements/test.in
mypy-extensions==0.4.3
# via
# black
@ -40,6 +54,8 @@ py==1.11.0
# via pytest
pycodestyle==2.9.1
# via flake8
pydantic==1.8.2
# via label-studio-sdk
pyflakes==2.5.0
# via flake8
pyparsing==3.0.9
@ -47,7 +63,13 @@ pyparsing==3.0.9
pytest==7.1.3
# via pytest-cov
pytest-cov==4.0.0
# via -r test.in
# via -r requirements/test.in
pyyaml==6.0
# via vcrpy
requests==2.28.1
# via label-studio-sdk
six==1.16.0
# via vcrpy
tomli==2.0.1
# via
# black
@ -55,4 +77,15 @@ tomli==2.0.1
# mypy
# pytest
typing-extensions==4.3.0
# via mypy
# via
# black
# mypy
# pydantic
urllib3==1.26.12
# via requests
vcrpy==4.2.1
# via -r requirements/test.in
wrapt==1.14.1
# via vcrpy
yarl==1.8.1
# via vcrpy

View File

@ -3,12 +3,69 @@ import unstructured.staging.label_studio as label_studio
from unstructured.documents.elements import Title, NarrativeText
from label_studio_sdk.client import Client
import logging
import re
import vcr
@pytest.fixture
def elements():
return [Title(text="Title 1"), NarrativeText(text="Narrative 1")]
@vcr.use_cassette("test_unstructured/vcr_fixtures/cassettes/label_studio_upload.yaml")
def test_upload_label_studio_data_with_sdk(caplog, elements):
"""
Testing Instructions
====================
1. Remove file `test_unstructured/vcr_fixtures/cassettes/label_studio_upload.yaml`,
which will be recreated later.
2. Install the label-studio package by running command `pip install -U label-studio`.
3. Run command `label-studio`, and login or set up label studio account on pop-up website.
4. Update `LABEL_STUDIO_URL` and `API_KEY` below, you can find your API_KEY by
clicking into your account profile.
5. Run this test once, and VCR will record the HTTP request to the yaml file.
6. Kill the label studio instance and run the test again, VCR will replay the response.
"""
log = logging.getLogger("urllib3")
log.setLevel(logging.DEBUG)
# Define the URL where Label Studio is accessible
LABEL_STUDIO_URL = "http://localhost:8080"
# API_KEY is a temporary key from local install not actually valid anywhere
# Update it if the vcr cassette is updated with the API key from your user account
API_KEY = "d44b92c31f592583bffb7e0d817a60c16a937bca"
# Connect to the Label Studio API and check the connection
ls = Client(url=LABEL_STUDIO_URL, api_key=API_KEY)
ls.check_connection()
ls.delete_all_projects()
# Create a sample project to classify types of texts
project = ls.start_project(
title="Text Type Classifications",
label_config="""
<View>
<Text name="text" value="$text"/>
<View style="box-shadow: 2px 2px 5px #999;
padding: 20px; margin-top: 2em;
border-radius: 5px;">
<Header value="Choose text type"/>
<Choices name="type" toName="text"
choice="single" showInLine="true">
<Choice value="Title"/>
<Choice value="Narrative"/>
</Choices>
</View>
</View>
""",
)
label_studio_data = label_studio.stage_for_label_studio(elements)
project.import_tasks(label_studio_data)
# Check success status code (201) for posting tasks job in logger info
success_posting_tasks_status = re.compile(r"POST /api/projects/.*/import.*201")
assert bool(success_posting_tasks_status.search(caplog.text))
def test_convert_to_label_studio_data(elements):
label_studio_data = label_studio.stage_for_label_studio(elements)

View File

@ -0,0 +1,319 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Authorization:
- Token d44b92c31f592583bffb7e0d817a60c16a937bca
Connection:
- keep-alive
User-Agent:
- python-requests/2.27.1
method: GET
uri: http://localhost:8080/health
response:
body:
string: '{"status": "UP"}'
headers:
Content-Language:
- en-us
Content-Length:
- '16'
Content-Type:
- text/html; charset=utf-8
Date:
- Wed, 05 Oct 2022 16:26:14 GMT
Referrer-Policy:
- same-origin
Server:
- WSGIServer/0.2 CPython/3.8.13
Set-Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w;
expires=Wed, 19 Oct 2022 16:26:14 GMT; HttpOnly; Max-Age=1209600; Path=/;
SameSite=Lax
Vary:
- Accept-Language, Cookie, Origin
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- DENY
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Authorization:
- Token d44b92c31f592583bffb7e0d817a60c16a937bca
Connection:
- keep-alive
Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w
User-Agent:
- python-requests/2.27.1
method: GET
uri: http://localhost:8080/api/projects?page_size=10000000
response:
body:
string: '{"count":1,"next":null,"previous":null,"results":[{"id":95,"title":"Text
Type Classifications","description":"","label_config":"<View>\n <Text
name=\"text\" value=\"$text\"/>\n <View style=\"box-shadow: 2px 2px
5px #999;\n padding: 20px; margin-top: 2em;\n border-radius:
5px;\">\n <Header value=\"Choose text type\"/>\n <Choices
name=\"type\" toName=\"text\"\n choice=\"single\" showInLine=\"true\">\n <Choice
value=\"Title\"/>\n <Choice value=\"Narrative\"/>\n </Choices>\n </View>\n </View>","expert_instruction":"","show_instruction":false,"show_skip_button":true,"enable_empty_annotation":true,"show_annotation_history":false,"organization":1,"color":"#FFFFFF","maximum_annotations":1,"is_published":false,"model_version":"","is_draft":false,"created_by":{"id":1,"first_name":"","last_name":"","email":"yuming@unstructured.io","avatar":null},"created_at":"2022-10-05T16:15:26.800180Z","min_annotations_to_start_training":0,"start_training_on_annotation_update":false,"show_collab_predictions":true,"num_tasks_with_annotations":0,"task_number":2,"useful_annotation_number":0,"ground_truth_number":0,"skipped_annotations_number":0,"total_annotations_number":0,"total_predictions_number":0,"sampling":"Sequential
sampling","show_ground_truth_first":false,"show_overlap_first":false,"overlap_cohort_percentage":100,"task_data_login":null,"task_data_password":null,"control_weights":{"type":{"overall":1.0,"type":"Choices","labels":{"Title":1.0,"Narrative":1.0}}},"parsed_label_config":{"type":{"type":"Choices","to_name":["text"],"inputs":[{"type":"Text","value":"text"}],"labels":["Title","Narrative"],"labels_attrs":{"Title":{"value":"Title"},"Narrative":{"value":"Narrative"}}}},"evaluate_predictions_automatically":false,"config_has_control_tags":true,"skip_queue":"REQUEUE_FOR_OTHERS","reveal_preannotations_interactively":false,"pinned_at":null}]}'
headers:
Allow:
- GET, POST, HEAD, OPTIONS
Content-Language:
- en-us
Content-Length:
- '2002'
Content-Type:
- application/json
Date:
- Wed, 05 Oct 2022 16:26:14 GMT
Referrer-Policy:
- same-origin
Server:
- WSGIServer/0.2 CPython/3.8.13
Set-Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w;
expires=Wed, 19 Oct 2022 16:26:14 GMT; HttpOnly; Max-Age=1209600; Path=/;
SameSite=Lax
Vary:
- Accept-Language, Cookie, Origin
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- DENY
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Authorization:
- Token d44b92c31f592583bffb7e0d817a60c16a937bca
Connection:
- keep-alive
Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w
User-Agent:
- python-requests/2.27.1
method: GET
uri: http://localhost:8080/api/projects/95
response:
body:
string: '{"id":95,"title":"Text Type Classifications","description":"","label_config":"<View>\n <Text
name=\"text\" value=\"$text\"/>\n <View style=\"box-shadow: 2px 2px
5px #999;\n padding: 20px; margin-top: 2em;\n border-radius:
5px;\">\n <Header value=\"Choose text type\"/>\n <Choices
name=\"type\" toName=\"text\"\n choice=\"single\" showInLine=\"true\">\n <Choice
value=\"Title\"/>\n <Choice value=\"Narrative\"/>\n </Choices>\n </View>\n </View>","expert_instruction":"","show_instruction":false,"show_skip_button":true,"enable_empty_annotation":true,"show_annotation_history":false,"organization":1,"color":"#FFFFFF","maximum_annotations":1,"is_published":false,"model_version":"","is_draft":false,"created_by":{"id":1,"first_name":"","last_name":"","email":"yuming@unstructured.io","avatar":null},"created_at":"2022-10-05T16:15:26.800180Z","min_annotations_to_start_training":0,"start_training_on_annotation_update":false,"show_collab_predictions":true,"num_tasks_with_annotations":0,"task_number":2,"useful_annotation_number":0,"ground_truth_number":0,"skipped_annotations_number":0,"total_annotations_number":0,"total_predictions_number":0,"sampling":"Sequential
sampling","show_ground_truth_first":false,"show_overlap_first":false,"overlap_cohort_percentage":100,"task_data_login":null,"task_data_password":null,"control_weights":{"type":{"overall":1.0,"type":"Choices","labels":{"Title":1.0,"Narrative":1.0}}},"parsed_label_config":{"type":{"type":"Choices","to_name":["text"],"inputs":[{"type":"Text","value":"text"}],"labels":["Title","Narrative"],"labels_attrs":{"Title":{"value":"Title"},"Narrative":{"value":"Narrative"}}}},"evaluate_predictions_automatically":false,"config_has_control_tags":true,"skip_queue":"REQUEUE_FOR_OTHERS","reveal_preannotations_interactively":false,"pinned_at":null}'
headers:
Allow:
- GET, PUT, PATCH, DELETE, HEAD, OPTIONS
Content-Language:
- en-us
Content-Length:
- '1950'
Content-Type:
- application/json
Date:
- Wed, 05 Oct 2022 16:26:14 GMT
Referrer-Policy:
- same-origin
Server:
- WSGIServer/0.2 CPython/3.8.13
Set-Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w;
expires=Wed, 19 Oct 2022 16:26:14 GMT; HttpOnly; Max-Age=1209600; Path=/;
SameSite=Lax
Vary:
- Accept-Language, Cookie, Origin
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- DENY
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Authorization:
- Token d44b92c31f592583bffb7e0d817a60c16a937bca
Connection:
- keep-alive
Content-Length:
- '0'
Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w
User-Agent:
- python-requests/2.27.1
method: DELETE
uri: http://localhost:8080/api/projects/95/
response:
body:
string: ''
headers:
Allow:
- GET, PUT, PATCH, DELETE, HEAD, OPTIONS
Content-Language:
- en-us
Content-Length:
- '0'
Date:
- Wed, 05 Oct 2022 16:26:14 GMT
Referrer-Policy:
- same-origin
Server:
- WSGIServer/0.2 CPython/3.8.13
Set-Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w;
expires=Wed, 19 Oct 2022 16:26:14 GMT; HttpOnly; Max-Age=1209600; Path=/;
SameSite=Lax
Vary:
- Accept-Language, Cookie, Origin
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- DENY
status:
code: 204
message: No Content
- request:
body: '{"title": "Text Type Classifications", "label_config": "\n <View>\n <Text
name=\"text\" value=\"$text\"/>\n <View style=\"box-shadow: 2px 2px 5px
#999;\n padding: 20px; margin-top: 2em;\n border-radius:
5px;\">\n <Header value=\"Choose text type\"/>\n <Choices
name=\"type\" toName=\"text\"\n choice=\"single\" showInLine=\"true\">\n <Choice
value=\"Title\"/>\n <Choice value=\"Narrative\"/>\n </Choices>\n </View>\n </View>\n "}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Authorization:
- Token d44b92c31f592583bffb7e0d817a60c16a937bca
Connection:
- keep-alive
Content-Length:
- '591'
Content-Type:
- application/json
Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w
User-Agent:
- python-requests/2.27.1
method: POST
uri: http://localhost:8080/api/projects
response:
body:
string: '{"id":96,"title":"Text Type Classifications","description":"","label_config":"<View>\n <Text
name=\"text\" value=\"$text\"/>\n <View style=\"box-shadow: 2px 2px
5px #999;\n padding: 20px; margin-top: 2em;\n border-radius:
5px;\">\n <Header value=\"Choose text type\"/>\n <Choices
name=\"type\" toName=\"text\"\n choice=\"single\" showInLine=\"true\">\n <Choice
value=\"Title\"/>\n <Choice value=\"Narrative\"/>\n </Choices>\n </View>\n </View>","expert_instruction":"","show_instruction":false,"show_skip_button":true,"enable_empty_annotation":true,"show_annotation_history":false,"organization":1,"color":"#FFFFFF","maximum_annotations":1,"is_published":false,"model_version":"","is_draft":false,"created_by":{"id":1,"first_name":"","last_name":"","email":"yuming@unstructured.io","avatar":null},"created_at":"2022-10-05T16:26:14.756037Z","min_annotations_to_start_training":0,"start_training_on_annotation_update":false,"show_collab_predictions":true,"num_tasks_with_annotations":null,"task_number":null,"useful_annotation_number":null,"ground_truth_number":null,"skipped_annotations_number":null,"total_annotations_number":null,"total_predictions_number":null,"sampling":"Sequential
sampling","show_ground_truth_first":false,"show_overlap_first":false,"overlap_cohort_percentage":100,"task_data_login":null,"task_data_password":null,"control_weights":{"type":{"overall":1.0,"type":"Choices","labels":{"Title":1.0,"Narrative":1.0}}},"parsed_label_config":{"type":{"type":"Choices","to_name":["text"],"inputs":[{"type":"Text","value":"text"}],"labels":["Title","Narrative"],"labels_attrs":{"Title":{"value":"Title"},"Narrative":{"value":"Narrative"}}}},"evaluate_predictions_automatically":false,"config_has_control_tags":true,"skip_queue":"REQUEUE_FOR_OTHERS","reveal_preannotations_interactively":false,"pinned_at":null}'
headers:
Allow:
- GET, POST, HEAD, OPTIONS
Content-Language:
- en-us
Content-Length:
- '1971'
Content-Type:
- application/json
Date:
- Wed, 05 Oct 2022 16:26:14 GMT
Referrer-Policy:
- same-origin
Server:
- WSGIServer/0.2 CPython/3.8.13
Set-Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w;
expires=Wed, 19 Oct 2022 16:26:14 GMT; HttpOnly; Max-Age=1209600; Path=/;
SameSite=Lax
Vary:
- Accept-Language, Cookie, Origin
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- DENY
status:
code: 201
message: Created
- request:
body: '[{"data": {"text": "Title 1", "ref_id": "ab03af41c2940e7584b62df48a964db3"}},
{"data": {"text": "Narrative 1", "ref_id": "ff9eb806beb1f483322f6fbda680b08b"}}]'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Authorization:
- Token d44b92c31f592583bffb7e0d817a60c16a937bca
Connection:
- keep-alive
Content-Length:
- '158'
Content-Type:
- application/json
Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w
User-Agent:
- python-requests/2.27.1
method: POST
uri: http://localhost:8080/api/projects/96/import?return_task_ids=1
response:
body:
string: '{"task_count":2,"annotation_count":0,"prediction_count":0,"duration":0.012760162353515625,"file_upload_ids":[],"could_be_tasks_list":false,"found_formats":[],"data_columns":[],"task_ids":[1,2]}'
headers:
Allow:
- POST, OPTIONS
Content-Language:
- en-us
Content-Length:
- '193'
Content-Type:
- application/json
Date:
- Wed, 05 Oct 2022 16:26:14 GMT
Referrer-Policy:
- same-origin
Server:
- WSGIServer/0.2 CPython/3.8.13
Set-Cookie:
- sessionid=eyJ1aWQiOiJjYjYxOWVmYi05ZDU1LTQzNWYtOGQ4Ni00ZjcyZGJjMDM2ZTYiLCJvcmdhbml6YXRpb25fcGsiOjF9:1og7Dq:qxIxdgId2dOfw5lhYIjhXa3XGZd91f5GTyNXBnwFm_w;
expires=Wed, 19 Oct 2022 16:26:14 GMT; HttpOnly; Max-Age=1209600; Path=/;
SameSite=Lax
Vary:
- Accept-Language, Cookie, Origin
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- DENY
status:
code: 201
message: Created
version: 1