Spaces:
Runtime error
Runtime error
| """Lilac deployer streamlit UI. | |
| This powers: https://huggingface.co/spaces/lilacai/lilac_deployer | |
| """ | |
| from typing import Literal, Optional, Union | |
| import lilac as ll | |
| import streamlit as st | |
| from datasets import load_dataset_builder | |
| if 'current_page' not in st.session_state: | |
| st.session_state.current_page = 'dataset' | |
| query_params = st.experimental_get_query_params() | |
| if 'dataset' in query_params: | |
| st.session_state.hf_dataset_name = query_params['dataset'][0] | |
| def _dataset_page(): | |
| is_valid_dataset = False | |
| st.header('Deploy Lilac for a HuggingFace dataset to a space', anchor=False) | |
| st.subheader( | |
| 'Step 1: select a dataset', | |
| divider='violet', | |
| anchor=False, | |
| help='For a list of datasets see: https://huggingface.co/datasets', | |
| ) | |
| hf_dataset_name = st.text_input( | |
| 'dataset id', | |
| help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`', | |
| placeholder='dataset or user/dataset', | |
| value=st.session_state.get('hf_dataset_name', None), | |
| ) | |
| with st.expander('advanced options'): | |
| hf_config_name = st.text_input( | |
| 'config', | |
| help='Some datasets required this field.', | |
| placeholder='(optional)', | |
| value=st.session_state.get('hf_config_name', None), | |
| ) | |
| hf_split = st.text_input( | |
| 'split', | |
| help='Loads all splits by default.', | |
| placeholder='(optional)', | |
| value=st.session_state.get('hf_split', None), | |
| ) | |
| sample_size = st.number_input( | |
| 'sample size', | |
| help='Number of rows to sample from the dataset, for each split.', | |
| placeholder='(optional)', | |
| min_value=1, | |
| step=1, | |
| key='sample_size', | |
| value=st.session_state.get('sample_size', None), | |
| ) | |
| hf_read_token = st.text_input( | |
| 'huggingface [read token](https://huggingface.co/settings/tokens)', | |
| type='password', | |
| help='The access token is used to authenticate you with HuggingFace to read the dataset. ' | |
| 'https://huggingface.co/docs/hub/security-tokens', | |
| placeholder='(optional if dataset is public)', | |
| ) | |
| def _next(): | |
| st.session_state.current_page = 'space' | |
| st.session_state.hf_dataset_name = hf_dataset_name | |
| st.session_state.hf_config_name = hf_config_name | |
| st.session_state.hf_split = hf_split | |
| st.session_state.sample_size = sample_size | |
| def _next_button(): | |
| enabled = is_valid_dataset | |
| return st.button('Next', disabled=not enabled, type='primary', on_click=_next) | |
| ds_builder = None | |
| if hf_dataset_name: | |
| is_valid_dataset = False | |
| try: | |
| ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token) | |
| is_valid_dataset = True | |
| except Exception as e: | |
| st.session_state.ds_error = e | |
| st.session_state.ds_loaded = False | |
| st.session_state.hf_dataset_name = hf_dataset_name | |
| _next_button() | |
| if ds_builder: | |
| st.session_state.ds_loaded = True | |
| st.session_state.ds_error = None | |
| st.session_state.ds_dataset_name = hf_dataset_name | |
| st.session_state.ds_description = ds_builder.info.description | |
| st.session_state.ds_features = ds_builder.info.features | |
| st.session_state.ds_splits = ds_builder.info.splits | |
| else: | |
| st.session_state.ds_loaded = False | |
| def _space_page(): | |
| session = dict(st.session_state) | |
| def _back(): | |
| st.session_state.hf_space_name = hf_space_name | |
| st.session_state.hf_storage = hf_storage | |
| st.session_state.hf_access_token = hf_access_token | |
| st.session_state.current_page = 'dataset' | |
| hf_space_name = st.session_state.get('hf_space_name', None) | |
| hf_storage = st.session_state.get('hf_storage', None) | |
| hf_access_token = st.session_state.get('hf_access_token', None) | |
| def _back_button(): | |
| return st.button('⬅ Back', on_click=_back) | |
| _back_button() | |
| st.subheader( | |
| 'Step 2: create huggingface space', | |
| divider='violet', | |
| anchor=False, | |
| help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)', | |
| ) | |
| if session.get('hf_config_name', None): | |
| st.write(f'Config: {session["hf_config_name"]}') | |
| if st.session_state.get('hf_split', None): | |
| st.write(f'Split: {session["hf_split"]}') | |
| if st.session_state.get('sample_size', None): | |
| st.write(f'Sample size: {session["sample_size"]}') | |
| hf_space_name = st.text_input( | |
| 'space id', | |
| help='This space will be created if it does not exist', | |
| placeholder='org/name', | |
| value=hf_space_name, | |
| ) | |
| hf_access_token = st.text_input( | |
| 'huggingface [write token](https://huggingface.co/settings/tokens)', | |
| type='password', | |
| help='The access token is used to authenticate you with HuggingFace to create the space. ' | |
| 'https://huggingface.co/docs/hub/security-tokens', | |
| value=hf_access_token, | |
| ) | |
| storage_options = ['None', 'small', 'medium', 'large'] | |
| hf_storage = st.selectbox( | |
| 'persistent storage', | |
| ['None', 'small', 'medium', 'large'], | |
| help='Persistent storage is required if you want data to persist past the lifetime of the ' | |
| 'space docker image. This is recommended when running computations like signals or embeddings,' | |
| 'or if you want labels to persist. You will get charged for persistent storage. See ' | |
| 'https://huggingface.co/docs/hub/spaces-storage', | |
| index=storage_options.index(hf_storage if hf_storage else 'None'), | |
| ) | |
| def _deploy_button(): | |
| enabled = hf_access_token and hf_space_name | |
| return st.button('Deploy', disabled=not enabled, on_click=_deploy) | |
| def _deploy(): | |
| hf_dataset_name = st.session_state['hf_dataset_name'] | |
| assert hf_space_name and hf_access_token and hf_dataset_name | |
| hf_config_name = st.session_state.get('hf_config_name', None) | |
| hf_split = st.session_state.get('hf_split', None) | |
| sample_size = st.session_state.get('sample_size', None) | |
| hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]] | |
| if hf_storage == 'None': | |
| hf_space_storage = None | |
| else: | |
| assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large' | |
| hf_space_storage = hf_storage | |
| try: | |
| space_link = ll.deploy_config( | |
| hf_space=hf_space_name, | |
| create_space=True, | |
| hf_space_storage=hf_space_storage, | |
| config=ll.Config( | |
| datasets=[ | |
| ll.DatasetConfig( | |
| namespace='local', | |
| name=hf_dataset_name.replace('/', '_'), | |
| source=ll.HuggingFaceSource( | |
| dataset_name=hf_dataset_name, | |
| config_name=hf_config_name, | |
| split=hf_split, | |
| sample_size=int(sample_size) if sample_size else None, | |
| token=hf_access_token, | |
| ), | |
| ) | |
| ] | |
| ), | |
| hf_token=hf_access_token, | |
| ) | |
| st.session_state.space_link = space_link | |
| st.session_state.current_page = 'success' | |
| except Exception as e: | |
| st.subheader('Deployment failed!', divider='red') | |
| st.error(e) | |
| _deploy_button() | |
| def _success_page(): | |
| space_link = st.session_state.space_link | |
| st.subheader('Success!', divider='green') | |
| st.subheader(f'[Visit your HuggingFace space ↗]({space_link})') | |
| st.write( | |
| 'Spaces are private by default. ' | |
| f'To make them public, visit the [Space settings]({space_link}/settings). ' | |
| ) | |
| if st.session_state.current_page == 'dataset': | |
| _dataset_page() | |
| elif st.session_state.current_page == 'space': | |
| _space_page() | |
| elif st.session_state.current_page == 'success': | |
| _success_page() | |
| # Sidebar content. | |
| dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get( | |
| 'hf_dataset_name', None | |
| ) | |
| if st.session_state.get('ds_loaded', False): | |
| st.sidebar.header( | |
| f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})', | |
| divider='rainbow', | |
| anchor=False, | |
| help='Dataset information from HuggingFace datasets.', | |
| ) | |
| st.sidebar.write(st.session_state.get('ds_description', None)) | |
| st.sidebar.write('##### Features') | |
| st.sidebar.table(st.session_state.get('ds_features', {})) | |
| st.sidebar.write('##### Splits') | |
| st.sidebar.table(st.session_state.get('ds_splits', {})) | |
| else: | |
| if st.session_state.get('ds_error', None): | |
| st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False) | |
| st.sidebar.error(st.session_state.get('ds_error', None)) | |
| st.sidebar.write( | |
| 'If the dataset is private, make sure to enter a HuggingFace ' | |
| 'token that has access to the dataset.' | |
| ) | |
| else: | |
| st.sidebar.write('Choose a dataset to see more info..') | |