Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files- config.py +77 -1
- copy_chromadb.py +5 -0
config.py
CHANGED
|
@@ -953,6 +953,79 @@ class SanatanConfig:
|
|
| 953 |
],
|
| 954 |
"llm_hints": [],
|
| 955 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
]
|
| 957 |
|
| 958 |
def get_scripture_by_collection(self, collection_name: str):
|
|
@@ -1080,7 +1153,10 @@ class SanatanConfig:
|
|
| 1080 |
canonical_doc["text"] = canonical_doc["document"]
|
| 1081 |
canonical_doc["document"] = "-"
|
| 1082 |
verse = resolve_field(config.get("unit_field", config.get("unit")))
|
| 1083 |
-
|
|
|
|
|
|
|
|
|
|
| 1084 |
canonical_doc["id"] = resolve_field("id")
|
| 1085 |
canonical_doc["_global_index"] = resolve_field("_global_index")
|
| 1086 |
return canonical_doc
|
|
|
|
| 953 |
],
|
| 954 |
"llm_hints": [],
|
| 955 |
},
|
| 956 |
+
{
|
| 957 |
+
"name": "taitriya_samhitha",
|
| 958 |
+
"title": "Taitriya Samhitha",
|
| 959 |
+
"output_dir": "./output/taitriya_samhitha",
|
| 960 |
+
"collection_name": "taitriya_samhitha",
|
| 961 |
+
"collection_embedding_fn": "openai",
|
| 962 |
+
"unit": "panchadhi",
|
| 963 |
+
"unit_field": "panchadhi",
|
| 964 |
+
# "chapter_order": lambda: get_chapter_order_from_taitriya_samhitha(),
|
| 965 |
+
"field_mapping": {
|
| 966 |
+
"text": "sanskrit",
|
| 967 |
+
"unit_index": "panchadhi",
|
| 968 |
+
"transliteration": "transliteration",
|
| 969 |
+
"chapter_name": "prashnam_name",
|
| 970 |
+
"relative_path": lambda doc: f"{doc.get('kandam')}.{doc.get('prashnam')}.{doc.get('anuvakam')}.{doc.get('panchadhi')}",
|
| 971 |
+
},
|
| 972 |
+
"metadata_fields": [
|
| 973 |
+
{
|
| 974 |
+
"name": "kandam",
|
| 975 |
+
"datatype": "int",
|
| 976 |
+
"label": "Kandam Number",
|
| 977 |
+
"description": "Kandam Number",
|
| 978 |
+
"show_as_filter": True,
|
| 979 |
+
"is_unique": True,
|
| 980 |
+
},
|
| 981 |
+
{
|
| 982 |
+
"name": "prashnam",
|
| 983 |
+
"datatype": "int",
|
| 984 |
+
"label": "Prashnam Number",
|
| 985 |
+
"description": "Prashnam Number",
|
| 986 |
+
"show_as_filter": True,
|
| 987 |
+
"is_unique": True,
|
| 988 |
+
},
|
| 989 |
+
{
|
| 990 |
+
"name": "anuvakam",
|
| 991 |
+
"datatype": "int",
|
| 992 |
+
"label": "Anuvakam Number",
|
| 993 |
+
"description": "Anuvakam Number",
|
| 994 |
+
"show_as_filter": True,
|
| 995 |
+
"is_unique": True,
|
| 996 |
+
},
|
| 997 |
+
{
|
| 998 |
+
"name": "panchadhi",
|
| 999 |
+
"datatype": "int",
|
| 1000 |
+
"label": "Panchadhi Number",
|
| 1001 |
+
"description": "Panchadhi Number",
|
| 1002 |
+
"show_as_filter": True,
|
| 1003 |
+
"is_unique": True,
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"name": "sanskrit",
|
| 1007 |
+
"label": "Lyrics in sanskrit",
|
| 1008 |
+
"datatype": "str",
|
| 1009 |
+
"description": "The original sloka in sanskrit.",
|
| 1010 |
+
},
|
| 1011 |
+
{
|
| 1012 |
+
"name": "transliteration",
|
| 1013 |
+
"label": "Transliteration in english",
|
| 1014 |
+
"datatype": "str",
|
| 1015 |
+
"description": "The original sloka transliterated in English.",
|
| 1016 |
+
},
|
| 1017 |
+
],
|
| 1018 |
+
"pdf_path": "./data/taitriya_samhitha.pdf",
|
| 1019 |
+
"source": "https://vignanam.org/english/shanti-panchakam.html",
|
| 1020 |
+
"language": "san+eng",
|
| 1021 |
+
"example_labels": [
|
| 1022 |
+
"Taitriya Samhitha",
|
| 1023 |
+
],
|
| 1024 |
+
"examples": [
|
| 1025 |
+
"Show some verses from Taitriya Samhitha",
|
| 1026 |
+
],
|
| 1027 |
+
"llm_hints": [],
|
| 1028 |
+
},
|
| 1029 |
]
|
| 1030 |
|
| 1031 |
def get_scripture_by_collection(self, collection_name: str):
|
|
|
|
| 1153 |
canonical_doc["text"] = canonical_doc["document"]
|
| 1154 |
canonical_doc["document"] = "-"
|
| 1155 |
verse = resolve_field(config.get("unit_field", config.get("unit")))
|
| 1156 |
+
if verse == "-":
|
| 1157 |
+
canonical_doc["verse"] = -1
|
| 1158 |
+
else:
|
| 1159 |
+
canonical_doc["verse"] = int(verse) if verse else 0
|
| 1160 |
canonical_doc["id"] = resolve_field("id")
|
| 1161 |
canonical_doc["_global_index"] = resolve_field("_global_index")
|
| 1162 |
return canonical_doc
|
copy_chromadb.py
CHANGED
|
@@ -38,6 +38,11 @@ db_config = {
|
|
| 38 |
"source_collection_name": "shanthi_panchakam",
|
| 39 |
"destination_collection_name": "shanthi_panchakam",
|
| 40 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
|
| 43 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|
|
|
|
| 38 |
"source_collection_name": "shanthi_panchakam",
|
| 39 |
"destination_collection_name": "shanthi_panchakam",
|
| 40 |
},
|
| 41 |
+
"taitriya_samhitha": {
|
| 42 |
+
"source_db_path": "../taitriya_samhitha_ai/chromadb_store",
|
| 43 |
+
"source_collection_name": "taitriya_samhitha",
|
| 44 |
+
"destination_collection_name": "taitriya_samhitha",
|
| 45 |
+
},
|
| 46 |
}
|
| 47 |
|
| 48 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|