vikramvasudevan commited on
Commit
51db0fa
·
verified ·
1 Parent(s): 1f0c180

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.py +77 -1
  2. copy_chromadb.py +5 -0
config.py CHANGED
@@ -953,6 +953,79 @@ class SanatanConfig:
953
  ],
954
  "llm_hints": [],
955
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
  ]
957
 
958
  def get_scripture_by_collection(self, collection_name: str):
@@ -1080,7 +1153,10 @@ class SanatanConfig:
1080
  canonical_doc["text"] = canonical_doc["document"]
1081
  canonical_doc["document"] = "-"
1082
  verse = resolve_field(config.get("unit_field", config.get("unit")))
1083
- canonical_doc["verse"] = int(verse) if verse else 0
 
 
 
1084
  canonical_doc["id"] = resolve_field("id")
1085
  canonical_doc["_global_index"] = resolve_field("_global_index")
1086
  return canonical_doc
 
953
  ],
954
  "llm_hints": [],
955
  },
956
+ {
957
+ "name": "taitriya_samhitha",
958
+ "title": "Taitriya Samhitha",
959
+ "output_dir": "./output/taitriya_samhitha",
960
+ "collection_name": "taitriya_samhitha",
961
+ "collection_embedding_fn": "openai",
962
+ "unit": "panchadhi",
963
+ "unit_field": "panchadhi",
964
+ # "chapter_order": lambda: get_chapter_order_from_taitriya_samhitha(),
965
+ "field_mapping": {
966
+ "text": "sanskrit",
967
+ "unit_index": "panchadhi",
968
+ "transliteration": "transliteration",
969
+ "chapter_name": "prashnam_name",
970
+ "relative_path": lambda doc: f"{doc.get('kandam')}.{doc.get('prashnam')}.{doc.get('anuvakam')}.{doc.get('panchadhi')}",
971
+ },
972
+ "metadata_fields": [
973
+ {
974
+ "name": "kandam",
975
+ "datatype": "int",
976
+ "label": "Kandam Number",
977
+ "description": "Kandam Number",
978
+ "show_as_filter": True,
979
+ "is_unique": True,
980
+ },
981
+ {
982
+ "name": "prashnam",
983
+ "datatype": "int",
984
+ "label": "Prashnam Number",
985
+ "description": "Prashnam Number",
986
+ "show_as_filter": True,
987
+ "is_unique": True,
988
+ },
989
+ {
990
+ "name": "anuvakam",
991
+ "datatype": "int",
992
+ "label": "Anuvakam Number",
993
+ "description": "Anuvakam Number",
994
+ "show_as_filter": True,
995
+ "is_unique": True,
996
+ },
997
+ {
998
+ "name": "panchadhi",
999
+ "datatype": "int",
1000
+ "label": "Panchadhi Number",
1001
+ "description": "Panchadhi Number",
1002
+ "show_as_filter": True,
1003
+ "is_unique": True,
1004
+ },
1005
+ {
1006
+ "name": "sanskrit",
1007
+ "label": "Lyrics in sanskrit",
1008
+ "datatype": "str",
1009
+ "description": "The original sloka in sanskrit.",
1010
+ },
1011
+ {
1012
+ "name": "transliteration",
1013
+ "label": "Transliteration in english",
1014
+ "datatype": "str",
1015
+ "description": "The original sloka transliterated in English.",
1016
+ },
1017
+ ],
1018
+ "pdf_path": "./data/taitriya_samhitha.pdf",
1019
+ "source": "https://vignanam.org/english/shanti-panchakam.html",
1020
+ "language": "san+eng",
1021
+ "example_labels": [
1022
+ "Taitriya Samhitha",
1023
+ ],
1024
+ "examples": [
1025
+ "Show some verses from Taitriya Samhitha",
1026
+ ],
1027
+ "llm_hints": [],
1028
+ },
1029
  ]
1030
 
1031
  def get_scripture_by_collection(self, collection_name: str):
 
1153
  canonical_doc["text"] = canonical_doc["document"]
1154
  canonical_doc["document"] = "-"
1155
  verse = resolve_field(config.get("unit_field", config.get("unit")))
1156
+ if verse == "-":
1157
+ canonical_doc["verse"] = -1
1158
+ else:
1159
+ canonical_doc["verse"] = int(verse) if verse else 0
1160
  canonical_doc["id"] = resolve_field("id")
1161
  canonical_doc["_global_index"] = resolve_field("_global_index")
1162
  return canonical_doc
copy_chromadb.py CHANGED
@@ -38,6 +38,11 @@ db_config = {
38
  "source_collection_name": "shanthi_panchakam",
39
  "destination_collection_name": "shanthi_panchakam",
40
  },
 
 
 
 
 
41
  }
42
 
43
  parser = argparse.ArgumentParser(description="My app with database parameter")
 
38
  "source_collection_name": "shanthi_panchakam",
39
  "destination_collection_name": "shanthi_panchakam",
40
  },
41
+ "taitriya_samhitha": {
42
+ "source_db_path": "../taitriya_samhitha_ai/chromadb_store",
43
+ "source_collection_name": "taitriya_samhitha",
44
+ "destination_collection_name": "taitriya_samhitha",
45
+ },
46
  }
47
 
48
  parser = argparse.ArgumentParser(description="My app with database parameter")