Inserts documents into the VectorStore and return the corresponding Sources.
| Name | Type | Description |
|---|---|---|
documents* | List[Dict[str, Any]] | A list of dictionaries that |
The document dictionaries must be in the following format* | unknown | |
{"document_type"* | unknown | "DOCUMENT_TYPE", **kwargs} where "DOCUMENT_TYPE" |
is one of the following* | unknown | |
The kwargs for each document type are shown below* | unknown | |
class PDF* | Document | document_type: Literal["PDF"] path: str metadata: Optional[dict[str, Any]] = None on_disk: bool = False version: str = "v1" chunk_size: int = 100 stride: int = 40 emphasize_first_words: int = 0 ignore_header_footer: bool = True ignore_nonstandard_orientation: bool = True |
class CSV* | Document | document_type: Literal["CSV"] path: str id_column: Optional[str] = None strong_columns: Optional[List[str]] = None weak_columns: Optional[List[str]] = None reference_columns: Optional[List[str]] = None save_extra_info: bool = True metadata: Optional[dict[str, Any]] = None has_offset: bool = False on_disk: bool = False |
class DOCX* | Document | document_type: Literal["DOCX"] path: str metadata: Optional[dict[str, Any]] = None on_disk: bool = False |
class URL* | Document | document_type: Literal["URL"] url: str save_extra_info: bool = True title_is_strong: bool = False metadata: Optional[dict[str, Any]] = None on_disk: bool = False |
class SentenceLevelPDF* | Document | document_type: Literal["SentenceLevelPDF"] path: str metadata: Optional[dict[str, Any]] = None on_disk: bool = False |
class SentenceLevelDOCX* | Document | document_type: Literal["SentenceLevelDOCX"] path: str metadata: Optional[dict[str, Any]] = None on_disk: bool = False |
class Unstructured* | Document | document_type: Literal["Unstructured"] path: str save_extra_info: bool = True metadata: Optional[dict[str, Any]] = None on_disk: bool = False |
class InMemoryText* | Document | document_type: Literal["InMemoryText"] name: str texts: list[str] metadatas: Optional[list[dict[str, Any]]] = None global_metadata: Optional[dict[str, Any]] = None on_disk: bool = False |