diff --git a/.env.example b/.env.example index ff0249c..fdcea24 100644 --- a/.env.example +++ b/.env.example @@ -40,3 +40,4 @@ LITELLM_LOG=FAKE_LOG_LEVEL HASH_SALT="FAKE_HASH_SALT" HASH_ALGO="FAKE_HASH_ALGO" AUTH_TOKEN_EXPIRATION=9999 +DATA_COLLECTION_HOST_PREFIX="fake_prefix" diff --git a/k8s/welearn-api/values.dev.yaml b/k8s/welearn-api/values.dev.yaml index 288eaef..9a2971b 100644 --- a/k8s/welearn-api/values.dev.yaml +++ b/k8s/welearn-api/values.dev.yaml @@ -5,6 +5,7 @@ config: nonSensitive: PG_HOST: dev-lab-projects-backend.postgres.database.azure.com TIKA_URL_BASE: https://tika.k8s.lp-i.dev/ + DATA_COLLECTION_HOST_PREFIX: welearn allowedHostsRegexes: mainUrl: |- https:\/\/welearn\.k8s\.lp-i\.dev diff --git a/k8s/welearn-api/values.prod.yaml b/k8s/welearn-api/values.prod.yaml index 85cb0c1..4242c7a 100644 --- a/k8s/welearn-api/values.prod.yaml +++ b/k8s/welearn-api/values.prod.yaml @@ -5,6 +5,7 @@ config: nonSensitive: PG_HOST: prod-prod-projects-backend.postgres.database.azure.com TIKA_URL_BASE: https://tika.k8s.lp-i.org/ + DATA_COLLECTION_HOST_PREFIX: workshop allowedHostsRegexes: alphaUrls: |- https://[a-zA-Z0-9-]*\.alpha-welearn\.lp-i\.org diff --git a/poetry.lock b/poetry.lock index 7514688..3267940 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -264,7 +264,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" groups = ["main", "metrics"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -885,7 +885,7 @@ files = [ {file = "cffi-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9"}, {file = "cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529"}, ] -markers = {main = "platform_python_implementation != \"PyPy\"", metrics = "(sys_platform == \"win32\" or implementation_name == \"pypy\") and (platform_python_implementation == \"CPython\" or implementation_name == \"pypy\")"} +markers = {main = "platform_python_implementation != \"PyPy\"", metrics = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\" or implementation_name == \"pypy\""} [package.dependencies] pycparser = {version = "*", markers = "implementation_name != \"PyPy\""} @@ -1375,7 +1375,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev", "metrics"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, @@ -1969,8 +1969,6 @@ files = [ {file = "greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d"}, {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5"}, {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f"}, - {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7"}, - {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8"}, {file = "greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c"}, {file = "greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2"}, {file = "greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246"}, @@ -1980,8 +1978,6 @@ files = [ {file = "greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8"}, {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52"}, {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa"}, - {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c"}, - {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5"}, {file = "greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9"}, {file = "greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd"}, {file = "greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb"}, @@ -1991,8 +1987,6 @@ files = [ {file = "greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0"}, {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0"}, {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f"}, - {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0"}, - {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d"}, {file = "greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02"}, {file = "greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31"}, {file = "greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945"}, @@ -2002,8 +1996,6 @@ files = [ {file = "greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671"}, {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b"}, {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae"}, - {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b"}, - {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929"}, {file = "greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b"}, {file = "greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0"}, {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f"}, @@ -2011,8 +2003,6 @@ files = [ {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1"}, {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735"}, {file = "greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337"}, - {file = "greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269"}, - {file = "greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681"}, {file = "greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01"}, {file = "greenlet-3.2.4-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:b6a7c19cf0d2742d0809a4c05975db036fdff50cd294a93632d6a310bf9ac02c"}, {file = "greenlet-3.2.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:27890167f55d2387576d1f41d9487ef171849ea0359ce1510ca6e06c8bece11d"}, @@ -2022,8 +2012,6 @@ files = [ {file = "greenlet-3.2.4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9913f1a30e4526f432991f89ae263459b1c64d1608c0d22a5c79c287b3c70df"}, {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b90654e092f928f110e0007f572007c9727b5265f7632c2fa7415b4689351594"}, {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:81701fd84f26330f0d5f4944d4e92e61afe6319dcd9775e39396e39d7c3e5f98"}, - {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:28a3c6b7cd72a96f61b0e4b2a36f681025b60ae4779cc73c1535eb5f29560b10"}, - {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:52206cd642670b0b320a1fd1cbfd95bca0e043179c1d8a045f2c6109dfe973be"}, {file = "greenlet-3.2.4-cp39-cp39-win32.whl", hash = "sha256:65458b409c1ed459ea899e939f0e1cdb14f58dbc803f2f93c5eab5694d32671b"}, {file = "greenlet-3.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:d2e685ade4dafd447ede19c31277a224a239a0a1a4eca4e6390efedf20260cfb"}, {file = "greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d"}, @@ -3702,7 +3690,7 @@ description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.10" groups = ["main", "metrics"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -4595,10 +4583,8 @@ files = [ {file = "psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c47676e5b485393f069b4d7a811267d3168ce46f988fa602658b8bb901e9e64d"}, {file = "psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a28d8c01a7b27a1e3265b11250ba7557e5f72b5ee9e5f3a2fa8d2949c29bf5d2"}, {file = "psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f3f2732cf504a1aa9e9609d02f79bea1067d99edf844ab92c247bbca143303b"}, - {file = "psycopg2_binary-2.9.11-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:865f9945ed1b3950d968ec4690ce68c55019d79e4497366d36e090327ce7db14"}, {file = "psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:91537a8df2bde69b1c1db01d6d944c831ca793952e4f57892600e96cee95f2cd"}, {file = "psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4dca1f356a67ecb68c81a7bc7809f1569ad9e152ce7fd02c2f2036862ca9f66b"}, - {file = "psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0da4de5c1ac69d94ed4364b6cbe7190c1a70d325f112ba783d83f8440285f152"}, {file = "psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37d8412565a7267f7d79e29ab66876e55cb5e8e7b3bbf94f8206f6795f8f7e7e"}, {file = "psycopg2_binary-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:c665f01ec8ab273a61c62beeb8cce3014c214429ced8a308ca1fc410ecac3a39"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e8480afd62362d0a6a27dd09e4ca2def6fa50ed3a4e7c09165266106b2ffa10"}, @@ -4606,10 +4592,8 @@ files = [ {file = "psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e164359396576a3cc701ba8af4751ae68a07235d7a380c631184a611220d9a4"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d57c9c387660b8893093459738b6abddbb30a7eab058b77b0d0d1c7d521ddfd7"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2c226ef95eb2250974bf6fa7a842082b31f68385c4f3268370e3f3870e7859ee"}, - {file = "psycopg2_binary-2.9.11-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a311f1edc9967723d3511ea7d2708e2c3592e3405677bf53d5c7246753591fbb"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb415404821b6d1c47353ebe9c8645967a5235e6d88f914147e7fd411419e6f"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f07c9c4a5093258a03b28fab9b4f151aa376989e7f35f855088234e656ee6a94"}, - {file = "psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:00ce1830d971f43b667abe4a56e42c1e2d594b32da4802e44a73bacacb25535f"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cffe9d7697ae7456649617e8bb8d7a45afb71cd13f7ab22af3e5c61f04840908"}, {file = "psycopg2_binary-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:304fd7b7f97eef30e91b8f7e720b3db75fee010b520e434ea35ed1ff22501d03"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:be9b840ac0525a283a96b556616f5b4820e0526addb8dcf6525a0fa162730be4"}, @@ -4617,10 +4601,8 @@ files = [ {file = "psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db"}, - {file = "psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a"}, - {file = "psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d"}, {file = "psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c"}, @@ -4628,10 +4610,8 @@ files = [ {file = "psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3"}, - {file = "psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c"}, - {file = "psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1"}, {file = "psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1"}, @@ -4639,10 +4619,8 @@ files = [ {file = "psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c"}, - {file = "psycopg2_binary-2.9.11-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e35b7abae2b0adab776add56111df1735ccc71406e56203515e228a8dc07089f"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f"}, - {file = "psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:32770a4d666fbdafab017086655bcddab791d7cb260a16679cc5a7338b64343b"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d"}, {file = "psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:20e7fb94e20b03dcc783f76c0865f9da39559dcc0c28dd1a3fce0d01902a6b9c"}, @@ -4650,10 +4628,8 @@ files = [ {file = "psycopg2_binary-2.9.11-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9d3a9edcfbe77a3ed4bc72836d466dfce4174beb79eda79ea155cc77237ed9e8"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:44fc5c2b8fa871ce7f0023f619f1349a0aa03a0857f2c96fbc01c657dcbbdb49"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9c55460033867b4622cda1b6872edf445809535144152e5d14941ef591980edf"}, - {file = "psycopg2_binary-2.9.11-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2d11098a83cca92deaeaed3d58cfd150d49b3b06ee0d0852be466bf87596899e"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:691c807d94aecfbc76a14e1408847d59ff5b5906a04a23e12a89007672b9e819"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:8b81627b691f29c4c30a8f322546ad039c40c328373b11dff7490a3e1b517855"}, - {file = "psycopg2_binary-2.9.11-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:b637d6d941209e8d96a072d7977238eea128046effbf37d1d8b2c0764750017d"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:41360b01c140c2a03d346cec3280cf8a71aa07d94f3b1509fa0161c366af66b4"}, {file = "psycopg2_binary-2.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:875039274f8a2361e5207857899706da840768e2a775bf8c65e82f60b197df02"}, ] @@ -4737,7 +4713,7 @@ files = [ {file = "pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934"}, {file = "pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2"}, ] -markers = {main = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\"", metrics = "(sys_platform == \"win32\" or implementation_name == \"pypy\") and implementation_name != \"PyPy\" and (platform_python_implementation == \"CPython\" or implementation_name == \"pypy\")"} +markers = {main = "platform_python_implementation != \"PyPy\" and implementation_name != \"PyPy\"", metrics = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and implementation_name != \"PyPy\" or implementation_name == \"pypy\""} [[package]] name = "pydantic" @@ -5943,7 +5919,7 @@ description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.10" groups = ["main", "metrics"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"}, {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"}, @@ -6459,7 +6435,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["main", "dev", "metrics"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"}, {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"}, @@ -6692,7 +6668,7 @@ files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] -markers = {dev = "python_version == \"3.10\""} +markers = {dev = "python_version < \"3.11\""} [[package]] name = "typing-inspect" @@ -6790,7 +6766,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "platform_python_implementation != \"PyPy\" and sys_platform != \"win32\" and sys_platform != \"cygwin\"" +markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, @@ -7068,14 +7044,14 @@ files = [ [[package]] name = "welearn-database" -version = "1.2.0" +version = "1.3.0" description = "All stuff related to relationnal database from the WeLearn project" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "welearn_database-1.2.0-py3-none-any.whl", hash = "sha256:8661a27f584473ca06f7c35bcf0a6f772b64f39447dfc323b2133ad4245b0710"}, - {file = "welearn_database-1.2.0.tar.gz", hash = "sha256:6ebd945301644f1929bde9d1364b24b7bec8f37efd1522b0bde606fed52fecc2"}, + {file = "welearn_database-1.3.0-py3-none-any.whl", hash = "sha256:37e76fb0c6dd4d5b8f1529118e61f1daeaedfba267e42365627471b99b604746"}, + {file = "welearn_database-1.3.0.tar.gz", hash = "sha256:055c32a411f932b340b03c717da06a1dc087b3f1543912e2700d5ad6f2c913f6"}, ] [package.dependencies] @@ -7703,4 +7679,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "ac3ba5d2fa2059e24a880398880e8e4b27cbe38d0efb8efaebed58ee2ae9b880" +content-hash = "4339b0051819bb83b7764e9f27a68c74c390063dd547a94312e7134947286c44" diff --git a/pyproject.toml b/pyproject.toml index 39d2494..98b58a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ langgraph-checkpoint-postgres = "^2.0.23" azure-ai-inference = "^1.0.0b9" azure-identity = "^1.25.0" psycopg = {extras = ["binary"], version = "^3.2.10"} -welearn-database = "1.2.0" +welearn-database = "1.3.0" bs4 = "^0.0.2" urllib3 = "^2.6.3" refinedoc = "^1.0.1" diff --git a/pytest.ini b/pytest.ini index 691650a..d4cf3a6 100644 --- a/pytest.ini +++ b/pytest.ini @@ -20,6 +20,7 @@ env = RUN_ENV=development TIKA_URL_BASE=https://tika.example.com USE_CACHED_SETTINGS=True + DATA_COLLECTION_HOST_PREFIX=workshop filterwarnings = ignore:.*U.*mode is deprecated:DeprecationWarning diff --git a/src/app/api/api_v1/endpoints/chat.py b/src/app/api/api_v1/endpoints/chat.py index 69b4216..5ab20b3 100644 --- a/src/app/api/api_v1/endpoints/chat.py +++ b/src/app/api/api_v1/endpoints/chat.py @@ -1,8 +1,9 @@ from typing import Dict, Optional, cast +from uuid import UUID import backoff import psycopg -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse from langchain_core.messages import ToolMessage from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver @@ -14,6 +15,7 @@ from src.app.models import chat as models from src.app.services.abst_chat import get_chat_service from src.app.services.constants import subjects as subjectsDict +from src.app.services.data_collection import get_data_collection_service from src.app.services.exceptions import ( EmptyQueryError, InvalidQuestionError, @@ -225,7 +227,7 @@ async def q_and_a_rephrase_stream( "/chat/answer", summary="Chat Answer", description="This endpoint is used to get the answer to the user's query based on the provided context and history", - response_model=str, + response_model=dict[str, str | UUID | None], ) @backoff.on_exception( wait_gen=backoff.expo, @@ -237,8 +239,11 @@ async def q_and_a_rephrase_stream( factor=2, ) async def q_and_a_ans( - body: models.ContextOut = Depends(get_params), chatfactory=Depends(get_chat_service) -) -> Optional[str]: + request: Request, + body: models.ContextOut = Depends(get_params), + chatfactory=Depends(get_chat_service), + data_collection=Depends(get_data_collection_service), +) -> dict[str, str | UUID | None] | None: """_summary_ Args: @@ -250,6 +255,8 @@ async def q_and_a_ans( str: openai chat completion content """ + session_id = request.headers.get("X-Session-ID") + try: content = await chatfactory.chat_message( query=body.query, @@ -257,7 +264,20 @@ async def q_and_a_ans( docs=body.sources, subject=subjectsDict.get(body.subject, None), ) - return cast(str, content) + + conversation_id, message_id = await data_collection.register_chat_data( + session_id=session_id, + user_query=body.query, + conversation_id=body.conversation_id, + answer_content=content, + sources=body.sources, + ) + + return { + "message_id": message_id, + "answer": content, + "conversation_id": conversation_id, + } except LanguageNotSupportedError as e: bad_request(message=e.message, msg_code=e.msg_code) diff --git a/src/app/api/api_v1/endpoints/metric.py b/src/app/api/api_v1/endpoints/metric.py index 8c09ac2..0f2b79f 100644 --- a/src/app/api/api_v1/endpoints/metric.py +++ b/src/app/api/api_v1/endpoints/metric.py @@ -1,9 +1,10 @@ -from fastapi import APIRouter, Response, status +from fastapi import APIRouter, Depends, Request, Response, status from pydantic import ValidationError from starlette.concurrency import run_in_threadpool from src.app.api.dependencies import get_settings -from src.app.models.metric import RowCorpusQtyDocInfo +from src.app.models.metric import DocumentClickUpdateResponse, RowCorpusQtyDocInfo +from src.app.services.data_collection import get_data_collection_service from src.app.services.sql_db.queries import get_document_qty_table_info_sync from src.app.utils.logger import logger as utils_logger @@ -38,3 +39,13 @@ async def get_nb_docs_info_per_corpus( if len(ret) == 0: response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR return ret + + +@router.post("/clicked_document") +async def update_clicked_doc_from_chat_message( + body: DocumentClickUpdateResponse, + request: Request, + data_collection=Depends(get_data_collection_service), +) -> str: + await data_collection.register_document_click(body.doc_id, body.message_id) + return "updated" diff --git a/src/app/api/api_v1/endpoints/micro_learning.py b/src/app/api/api_v1/endpoints/micro_learning.py index 1c4cde0..460fcc8 100644 --- a/src/app/api/api_v1/endpoints/micro_learning.py +++ b/src/app/api/api_v1/endpoints/micro_learning.py @@ -10,7 +10,7 @@ convert_embedding_bytes, ) from src.app.services.search import SearchService, get_search_service -from src.app.services.sql_service import ( +from src.app.services.sql_db.queries import ( get_context_documents, get_subject, get_subjects, diff --git a/src/app/api/api_v1/endpoints/search.py b/src/app/api/api_v1/endpoints/search.py index 779dbfa..154e332 100644 --- a/src/app/api/api_v1/endpoints/search.py +++ b/src/app/api/api_v1/endpoints/search.py @@ -2,7 +2,6 @@ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Response from fastapi.concurrency import run_in_threadpool -from qdrant_client.models import ScoredPoint from src.app.models.documents import Document from src.app.models.search import ( @@ -84,7 +83,7 @@ async def get_nb_docs() -> dict[str, int]: "/collections/{collection}", summary="search documents in a specific collection", description="Search documents in a specific collection", - response_model=list[ScoredPoint] | str | None, + response_model=list[Document] | str | None, ) async def search_doc_by_collection( background_tasks: BackgroundTasks, @@ -127,7 +126,7 @@ async def search_doc_by_collection( "/by_slices", summary="search all slices", description="Search slices in all collections or in collections specified", - response_model=list[ScoredPoint] | None | str, + response_model=list[Document] | None | str, ) async def search_all_slices_by_lang( background_tasks: BackgroundTasks, @@ -158,7 +157,7 @@ async def search_all_slices_by_lang( "/multiple_by_slices", summary="search all slices", description="Search slices in all collections or in collections specified", - response_model=list[ScoredPoint] | None, + response_model=list[Document] | None, ) async def multi_search_all_slices_by_lang( background_tasks: BackgroundTasks, @@ -186,7 +185,7 @@ async def multi_search_all_slices_by_lang( "/by_document", summary="search all documents", description="Search by documents, returns only one result by document id", - response_model=list[ScoredPoint] | None | str, + response_model=list[Document] | None | str, ) async def search_all( background_tasks: BackgroundTasks, diff --git a/src/app/core/config.py b/src/app/core/config.py index 899c8f7..e4ee159 100644 --- a/src/app/core/config.py +++ b/src/app/core/config.py @@ -20,6 +20,7 @@ class Settings(BaseSettings): """ BACKEND_CORS_ORIGINS_REGEX: str = CLIENT_ORIGINS_REGEX + DATA_COLLECTION_HOST_PREFIX: str def get_api_version(self, cls): return { diff --git a/src/app/middleware/monitor_requests.py b/src/app/middleware/monitor_requests.py index 009f09a..d4f5a0c 100644 --- a/src/app/middleware/monitor_requests.py +++ b/src/app/middleware/monitor_requests.py @@ -2,7 +2,7 @@ from fastapi.concurrency import run_in_threadpool from starlette.middleware.base import BaseHTTPMiddleware -from src.app.services.sql_service import register_endpoint +from src.app.services.sql_db.queries import register_endpoint from src.app.utils.logger import logger as logger_utils logger = logger_utils(__name__) diff --git a/src/app/models/chat.py b/src/app/models/chat.py index 3478ea9..cb8a50e 100644 --- a/src/app/models/chat.py +++ b/src/app/models/chat.py @@ -22,6 +22,7 @@ class ContextOut(BaseModel): history: list[dict] = [] query: str subject: str | None = Field(None) + conversation_id: uuid.UUID | None = Field(None) class Role(Enum): @@ -71,6 +72,11 @@ class AgentResponse(BaseModel): docs: list[ScoredPoint] | None = None +class UserQueryMetadata(BaseModel): + conversation_id: uuid.UUID + message_id: uuid.UUID + + PROMPTS = Literal["STANDALONE", "NEW_QUESTIONS", "REPHRASE"] RESPONSE_TYPE = Literal["json_object", "text"] diff --git a/src/app/models/metric.py b/src/app/models/metric.py index 2912307..3314946 100644 --- a/src/app/models/metric.py +++ b/src/app/models/metric.py @@ -1,3 +1,5 @@ +from uuid import UUID + from pydantic import BaseModel @@ -6,3 +8,8 @@ class RowCorpusQtyDocInfo(BaseModel): url: str qty_total: int qty_in_qdrant: int + + +class DocumentClickUpdateResponse(BaseModel): + message_id: UUID + doc_id: UUID diff --git a/src/app/services/data_collection.py b/src/app/services/data_collection.py new file mode 100644 index 0000000..bbabe8b --- /dev/null +++ b/src/app/services/data_collection.py @@ -0,0 +1,120 @@ +import uuid +from datetime import datetime, timedelta +from typing import Any + +from fastapi import HTTPException, Request, status +from fastapi.concurrency import run_in_threadpool + +from src.app.api.dependencies import get_settings +from src.app.models.documents import Document +from src.app.services.sql_db.queries import ( + get_current_data_collection_campaign, + update_returned_document_click, + write_chat_answer, + write_user_query, +) +from src.app.services.sql_db.queries_user import get_user_from_session_id +from src.app.utils.logger import logger as utils_logger + +logger = utils_logger(__name__) + +_cache: dict[str, Any] = {"is_campaign_active": None, "expires": None} + +# get from setting the starts with string +settings = get_settings() + + +class DataCollection: + def __init__(self, host: str): + is_campaign_active = self.get_campaign_state() + host_settings = settings.DATA_COLLECTION_HOST_PREFIX + self.should_collect = host.startswith(host_settings) and is_campaign_active + logger.info( + "data_collection: host_settings=%s, is_campaign=%s, should_collect=%s", + host_settings, + is_campaign_active, + self.should_collect, + ) + + def get_campaign_state( + self, + ): + """Returns True if a campaign is active, False otherwise.""" + + now = datetime.now() + if _cache["expires"] and now < _cache["expires"]: + return _cache["is_campaign_active"] is not None + + campaign = get_current_data_collection_campaign() + + _cache["is_campaign_active"] = campaign and campaign.is_active + _cache["expires"] = now + timedelta(hours=6) + + return _cache["is_campaign_active"] + + async def register_chat_data( + self, + session_id: str | None, + user_query: str, + conversation_id: uuid.UUID | None, + answer_content: str, + sources: list[Document], + ) -> tuple[uuid.UUID | None, uuid.UUID | None]: + + if not self.should_collect: + logger.info("data_collection is not enabled.") + return None, None + + logger.info("data_collection is enabled. Registering chat data.") + + if not session_id: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail={ + "message": "Session ID not found", + "code": "SESSION_ID_NOT_FOUND", + }, + ) + + user_id = await run_in_threadpool( + get_user_from_session_id, uuid.UUID(session_id) + ) + + if not user_id: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail={ + "message": "User not found", + "code": "USER_NOT_FOUND", + }, + ) + + conversation_id = await run_in_threadpool( + write_user_query, user_id, user_query, conversation_id + ) + + message_id = await run_in_threadpool( + write_chat_answer, user_id, answer_content, sources, conversation_id + ) + + return conversation_id, message_id + + async def register_document_click( + self, + doc_id: uuid.UUID, + message_id: uuid.UUID, + ) -> None: + if not self.should_collect: + logger.info("data_collection is not enabled.") + return + + logger.info("data_collection is enabled. Registering document click.") + + await run_in_threadpool(update_returned_document_click, doc_id, message_id) + + +def get_data_collection_service(request: Request) -> DataCollection: + host = request.url.hostname + if host is None: + return DataCollection(host="") + return DataCollection(host=host) diff --git a/src/app/services/data_quality.py b/src/app/services/data_quality.py index f744707..79a779b 100644 --- a/src/app/services/data_quality.py +++ b/src/app/services/data_quality.py @@ -7,7 +7,7 @@ from sqlalchemy.exc import IntegrityError from welearn_database.data.enumeration import Step -from src.app.services.sql_service import ( +from src.app.services.sql_db.queries import ( write_new_data_quality_error, write_process_state, ) diff --git a/src/app/services/helpers.py b/src/app/services/helpers.py index 06b0caa..a4cee7c 100644 --- a/src/app/services/helpers.py +++ b/src/app/services/helpers.py @@ -12,7 +12,7 @@ from src.app.models.collections import Collection from src.app.models.documents import JourneySectionType from src.app.services.exceptions import LanguageNotSupportedError -from src.app.services.sql_service import get_embeddings_model_id_according_name +from src.app.services.sql_db.queries import get_embeddings_model_id_according_name from src.app.utils.decorators import log_time_and_error_sync from src.app.utils.logger import logger as utils_logger diff --git a/src/app/services/monitoring.py b/src/app/services/monitoring.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/app/services/search.py b/src/app/services/search.py index 2b9647d..5633838 100644 --- a/src/app/services/search.py +++ b/src/app/services/search.py @@ -24,7 +24,7 @@ from src.app.services.data_quality import DataQualityChecker from src.app.services.exceptions import CollectionNotFoundError, ModelNotFoundError from src.app.services.helpers import convert_embedding_bytes -from src.app.services.sql_service import get_subject +from src.app.services.sql_db.queries import get_subject from src.app.utils.decorators import log_time_and_error, log_time_and_error_sync from src.app.utils.logger import logger as logger_utils diff --git a/src/app/services/security.py b/src/app/services/security.py index dba034f..ced0b90 100644 --- a/src/app/services/security.py +++ b/src/app/services/security.py @@ -8,7 +8,7 @@ from sqlalchemy.sql import select from welearn_database.data.models import APIKeyManagement -from src.app.services.sql_service import session_maker +from src.app.services.sql_db.queries import session_maker from src.app.utils.logger import logger as logger_utils api_key_header = APIKeyHeader(name="X-API-Key") diff --git a/src/app/services/sql_db/queries.py b/src/app/services/sql_db/queries.py index 384ab22..dbf3296 100644 --- a/src/app/services/sql_db/queries.py +++ b/src/app/services/sql_db/queries.py @@ -1,21 +1,38 @@ # src/app/services/sql_db/queries.py - +import uuid from collections import Counter +from threading import Lock +from uuid import UUID -from sqlalchemy import select +from sqlalchemy import func, select +from welearn_database.data.enumeration import Step from welearn_database.data.models import ( + ChatMessage, + ContextDocument, Corpus, CorpusNameEmbeddingModelLang, + DataCollectionCampaignManagement, DocumentSlice, + EmbeddingModel, + EndpointRequest, + ErrorDataQuality, + ProcessState, QtyDocumentInQdrant, QtyDocumentInQdrantPerCorpus, QtyDocumentPerCorpus, + ReturnedDocument, Sdg, WeLearnDocument, ) -from src.app.models.documents import Document, DocumentPayloadModel -from src.app.services.sql_service import session_maker +from src.app.models.chat import Role +from src.app.models.documents import Document, DocumentPayloadModel, JourneySection +from src.app.models.search import ContextType +from src.app.services.constants import APP_NAME +from src.app.services.sql_db.sql_service import session_maker + +model_id_cache: dict[str, UUID] = {} +model_id_lock = Lock() def get_collections_sync(): @@ -121,3 +138,245 @@ def get_documents_payload_by_ids_sync(documents_ids: list[str]) -> list[Document ) ) return docs + + +def register_endpoint(endpoint, session_id, http_code): + with session_maker() as session: + endpoint_request = EndpointRequest( + endpoint_name=endpoint, session_id=session_id, http_code=http_code + ) + session.add(endpoint_request) + session.commit() + + +def get_subject(subject: str, embedding_model_id: UUID) -> ContextDocument | None: + """ + Get the subject meta document from the database. + Args: + subject: The subject to get. + + Returns: The subject meta document. + + """ + with session_maker() as session: + subject_meta_document: ContextDocument | None = ( + session.query(ContextDocument) + .filter( + ContextDocument.context_type == ContextType.SUBJECT.value.lower(), + ContextDocument.title == subject, + ContextDocument.embedding_model_id == embedding_model_id, + ) + .first() + ) + return subject_meta_document + + +def get_subjects(embedding_model_id: UUID) -> list[ContextDocument]: + """ + Get all the subject meta documents from the database. + Returns: List of subject meta documents. + """ + with session_maker() as session: + sdg_meta_documents: list[ContextDocument] = ( + session.query(ContextDocument) + .filter( + ContextDocument.context_type == ContextType.SUBJECT.value.lower(), + ContextDocument.embedding_model_id == embedding_model_id, + ) + .all() + ) + return sdg_meta_documents + + +def get_context_documents( + journey_part: JourneySection, sdg: int, embedding_model_id: UUID +): + """ + Get the context documents from the database. + + Args: + journey_part: The journey part to get the context documents for. + sdg: The SDG to get the context documents for. + Returns: List of context documents. + """ + with session_maker() as session: + sdg_meta_documents: list[ContextDocument] = ( + session.query(ContextDocument) + .filter( + ContextDocument.context_type.in_(journey_part), + ContextDocument.sdg_related.contains([sdg]), + ContextDocument.embedding_model_id == embedding_model_id, + ) + .all() + ) + return sdg_meta_documents + + +def get_embeddings_model_id_according_name(model_name: str) -> UUID | None: + """ + Get the embeddings model ID according to its name. + + Args: + model_name: The name of the embeddings model. + + Returns: + The ID of the embeddings model if found, otherwise None. + """ + with model_id_lock: + if model_name in model_id_cache: + return model_id_cache[model_name] + + with session_maker() as session: + model = ( + session.query(EmbeddingModel) + .filter(EmbeddingModel.title == model_name) + .first() + ) + return model.id if model else None + + +def write_new_data_quality_error( + document_id: UUID, error_info: str, slice_id: UUID | None = None +) -> UUID: + """ + Write a new data quality error to the database. + Args: + document_id: The ID of the document with the error. + slice_id: The ID of the document slice with the error. + error_info: The error information. Usually exception message. + + Returns: + The ID of the new error entry. + """ + with session_maker() as session: + error_entry = ErrorDataQuality( + id=uuid.uuid4(), + document_id=document_id, + slice_id=slice_id, + error_raiser=APP_NAME, + error_info=error_info, + ) + session.add(error_entry) + session.commit() + return error_entry.id + + +def write_process_state(document_id: UUID, process_state: Step) -> UUID: + """ + Write the process state of a document to the database. + Args: + document_id: The ID of the document. + process_state: The current process state. + + Returns: + The ID of the new process state entry. + """ + with session_maker() as session: + process_state_entry = ProcessState( + id=uuid.uuid4(), + document_id=document_id, + title=process_state.value.lower(), + ) + session.add(process_state_entry) + session.commit() + return process_state_entry.id + + +def write_user_query( + user_id: UUID, query_content: str, conversation_id: UUID | None +) -> UUID: + """ + Write a user query to the chat in the database. + Args: + user_id: The ID of the user. + query_content: The content of the user query. + conversation_id: Key used for aggregated messages together, if None a new is generated in the API + + Returns: + The ID of the new chat message entry. + """ + if not conversation_id: + conversation_id = uuid.uuid4() + chat_msg = ChatMessage( + inferred_user_id=user_id, + role=Role.USER.value, + textual_content=query_content, + conversation_id=conversation_id, + ) + with session_maker() as session: + session.add(chat_msg) + session.commit() + return conversation_id + + +def write_chat_answer( + user_id: UUID, answer: str, docs: list[Document], conversation_id: UUID +) -> UUID: + """ + Write a chat answer to the database along with the referenced documents. + Args: + user_id: The ID of the user. + answer: The content of the chat answer. + docs: The list of documents referenced in the answer. + conversation_id: Key used for aggregated messages together + + Returns: + The ID of the new chat message entry and the list of returned document IDs. + """ + chat_msg_id = uuid.uuid4() + + chat_msg = ChatMessage( + id=chat_msg_id, + inferred_user_id=user_id, + role=Role.ASSISTANT.value, + textual_content=answer, + conversation_id=conversation_id, + ) + returned_docs: list[ReturnedDocument] = [] + for doc in docs: + returned_doc = ReturnedDocument( + message_id=chat_msg_id, + document_id=doc.payload.document_id, + ) + returned_docs.append(returned_doc) + + with session_maker() as session: + session.add(chat_msg) + session.add_all(returned_docs) + session.commit() + return chat_msg.id + + +def update_returned_document_click(document_id: UUID, message_id: UUID) -> None: + """ + Write a click on a returned document to the database. + Args: + document_id: The ID of the document that was clicked. + message_id: The ID of the chat message associated with the document. + + Returns: + None + """ + with session_maker() as session: + returned_doc = ( + session.query(ReturnedDocument) + .filter( + ReturnedDocument.document_id == document_id, + ReturnedDocument.message_id == message_id, + ) + .first() + ) + if returned_doc: + returned_doc.is_clicked = True + session.commit() + + +def get_current_data_collection_campaign() -> DataCollectionCampaignManagement | None: + with session_maker() as session: + campaign = ( + session.query(DataCollectionCampaignManagement) + .filter(DataCollectionCampaignManagement.end_at > func.now()) + .order_by(DataCollectionCampaignManagement.end_at) + .first() + ) + return campaign diff --git a/src/app/services/sql_db/queries_user.py b/src/app/services/sql_db/queries_user.py index af30460..7e49bdb 100644 --- a/src/app/services/sql_db/queries_user.py +++ b/src/app/services/sql_db/queries_user.py @@ -6,7 +6,7 @@ from sqlalchemy.sql import select from welearn_database.data.models import Bookmark, InferredUser, Session -from src.app.services.sql_service import session_maker +from src.app.services.sql_db.sql_service import session_maker from src.app.utils.logger import logger as logger_utils logger = logger_utils(__name__) @@ -70,6 +70,14 @@ def get_or_create_session_sync( return new_session.id +def get_user_from_session_id(session_id: uuid.UUID) -> uuid.UUID | None: + with session_maker() as s: + session = s.execute(select(Session).where(Session.id == session_id)).first() + if session: + return session[0].inferred_user_id + return None + + def get_user_bookmarks_sync(user_id: uuid.UUID) -> list[Bookmark]: with session_maker() as s: user = s.execute( diff --git a/src/app/services/sql_db/sql_service.py b/src/app/services/sql_db/sql_service.py new file mode 100644 index 0000000..d897e8a --- /dev/null +++ b/src/app/services/sql_db/sql_service.py @@ -0,0 +1,37 @@ +# src/app/services/sql_db/sql_service.py + +from sqlalchemy import URL, create_engine +from sqlalchemy.orm import sessionmaker + +from src.app.api.dependencies import get_settings +from src.app.utils.decorators import singleton + +settings = get_settings() + + +@singleton +class WL_SQL: + def __init__(self): + self.engine_url = URL.create( + drivername=settings.PG_DRIVER, + username=settings.PG_USER or None, + password=settings.PG_PASSWORD or None, + host=settings.PG_HOST or None, + port=int(settings.PG_PORT) if settings.PG_PORT else None, + database=settings.PG_DATABASE, + ) + self.engine = self._create_engine() + self.session_maker = self._create_session() + + def _create_engine(self): + + return create_engine(self.engine_url) + + def _create_session(self): + + Session = sessionmaker(bind=self.engine) + return Session + + +wl_sql = WL_SQL() +session_maker = wl_sql.session_maker diff --git a/src/app/services/sql_service.py b/src/app/services/sql_service.py deleted file mode 100644 index ed15527..0000000 --- a/src/app/services/sql_service.py +++ /dev/null @@ -1,203 +0,0 @@ -# src/app/services/sql_service.py - -import uuid -from threading import Lock -from uuid import UUID - -from sqlalchemy import URL -from sqlalchemy.orm import sessionmaker -from welearn_database.data.enumeration import Step -from welearn_database.data.models import ( - ContextDocument, - EmbeddingModel, - EndpointRequest, - ErrorDataQuality, - ProcessState, -) - -from src.app.api.dependencies import get_settings -from src.app.models.documents import JourneySection -from src.app.models.search import ContextType -from src.app.services.constants import APP_NAME -from src.app.utils.decorators import singleton - -# src/app/services/sql_service.py - - -settings = get_settings() - -model_id_cache: dict[str, UUID] = {} -model_id_lock = Lock() - - -@singleton -class WL_SQL: - def __init__(self): - self.engine_url = URL.create( - drivername=settings.PG_DRIVER, - username=settings.PG_USER or None, - password=settings.PG_PASSWORD or None, - host=settings.PG_HOST or None, - port=int(settings.PG_PORT) if settings.PG_PORT else None, - database=settings.PG_DATABASE, - ) - self.engine = self._create_engine() - self.session_maker = self._create_session() - - def _create_engine(self): - from sqlalchemy import create_engine - - return create_engine(self.engine_url) - - def _create_session(self): - - Session = sessionmaker(bind=self.engine) - return Session - - def register_endpoint(self, endpoint, session_id, http_code): - with self.session_maker() as session: - endpoint_request = EndpointRequest( - endpoint_name=endpoint, session_id=session_id, http_code=http_code - ) - session.add(endpoint_request) - session.commit() - - def get_subject( - self, subject: str, embedding_model_id: UUID - ) -> ContextDocument | None: - """ - Get the subject meta document from the database. - Args: - subject: The subject to get. - - Returns: The subject meta document. - - """ - with self.session_maker() as session: - subject_meta_document: ContextDocument | None = ( - session.query(ContextDocument) - .filter( - ContextDocument.context_type == ContextType.SUBJECT.value.lower(), - ContextDocument.title == subject, - ContextDocument.embedding_model_id == embedding_model_id, - ) - .first() - ) - return subject_meta_document - - def get_subjects(self, embedding_model_id: UUID) -> list[ContextDocument]: - """ - Get all the subject meta documents from the database. - Returns: List of subject meta documents. - """ - with self.session_maker() as session: - sdg_meta_documents: list[ContextDocument] = ( - session.query(ContextDocument) - .filter( - ContextDocument.context_type == ContextType.SUBJECT.value.lower(), - ContextDocument.embedding_model_id == embedding_model_id, - ) - .all() - ) - return sdg_meta_documents - - def get_context_documents( - self, journey_part: JourneySection, sdg: int, embedding_model_id: UUID - ): - """ - Get the context documents from the database. - - Args: - journey_part: The journey part to get the context documents for. - sdg: The SDG to get the context documents for. - Returns: List of context documents. - """ - with self.session_maker() as session: - sdg_meta_documents: list[ContextDocument] = ( - session.query(ContextDocument) - .filter( - ContextDocument.context_type.in_(journey_part), - ContextDocument.sdg_related.contains([sdg]), - ContextDocument.embedding_model_id == embedding_model_id, - ) - .all() - ) - return sdg_meta_documents - - def get_embeddings_model_id_according_name(self, model_name: str) -> UUID | None: - """ - Get the embeddings model ID according to its name. - - Args: - model_name: The name of the embeddings model. - - Returns: - The ID of the embeddings model if found, otherwise None. - """ - with model_id_lock: - if model_name in model_id_cache: - return model_id_cache[model_name] - - with self.session_maker() as session: - model = ( - session.query(EmbeddingModel) - .filter(EmbeddingModel.title == model_name) - .first() - ) - return model.id if model else None - - def write_new_data_quality_error( - self, document_id: UUID, error_info: str, slice_id: UUID | None = None - ) -> UUID: - """ - Write a new data quality error to the database. - Args: - document_id: The ID of the document with the error. - slice_id: The ID of the document slice with the error. - error_info: The error information. Usually exception message. - - Returns: - The ID of the new error entry. - """ - with self.session_maker() as session: - error_entry = ErrorDataQuality( - id=uuid.uuid4(), - document_id=document_id, - slice_id=slice_id, - error_raiser=APP_NAME, - error_info=error_info, - ) - session.add(error_entry) - session.commit() - return error_entry.id - - def write_process_state(self, document_id: UUID, process_state: Step) -> UUID: - """ - Write the process state of a document to the database. - Args: - document_id: The ID of the document. - process_state: The current process state. - - Returns: - The ID of the new process state entry. - """ - with self.session_maker() as session: - process_state_entry = ProcessState( - id=uuid.uuid4(), - document_id=document_id, - title=process_state.value.lower(), - ) - session.add(process_state_entry) - session.commit() - return process_state_entry.id - - -wl_sql = WL_SQL() -session_maker = wl_sql.session_maker -register_endpoint = wl_sql.register_endpoint -get_subject = wl_sql.get_subject -get_subjects = wl_sql.get_subjects -get_context_documents = wl_sql.get_context_documents -get_embeddings_model_id_according_name = wl_sql.get_embeddings_model_id_according_name -write_new_data_quality_error = wl_sql.write_new_data_quality_error -write_process_state = wl_sql.write_process_state diff --git a/src/app/tests/api/api_v1/test_chat.py b/src/app/tests/api/api_v1/test_chat.py index 268149e..1f2490e 100644 --- a/src/app/tests/api/api_v1/test_chat.py +++ b/src/app/tests/api/api_v1/test_chat.py @@ -63,6 +63,7 @@ } +@mock.patch("src.app.services.sql_db.queries.session_maker") @mock.patch( "src.app.services.security.check_api_key_sync", new=mock.MagicMock(return_value=True), @@ -87,7 +88,7 @@ async def test_chat(self, chat_mock, *mocks): response_json = response.json() self.assertEqual(response.status_code, 200) - self.assertEqual(response_json, "ok") + self.assertEqual(response_json["answer"], "ok") async def test_chat_empty_history(self, chat_mock, *mocks): chat_mock.return_value = "ok" @@ -128,7 +129,7 @@ async def test_chat_empty_history(self, chat_mock, *mocks): ) response_json = response.json() self.assertEqual(response.status_code, 200) - self.assertEqual(response_json, "ok") + self.assertEqual(response_json["answer"], "ok") async def test_chat_not_supported_lang(self, chat_mock, *mocks): # mock raise LanguageNotSupportedError @@ -209,7 +210,9 @@ def test_new_questions_empty_query(self, *mocks): }, ) - async def test_new_questions_ok(self, mock_chat_completion, mock__detect_language): + async def test_new_questions_ok( + self, mock_db_session, mock_chat_completion, mock__detect_language + ): with mock.patch( "src.app.services.abst_chat.AbstractChat.get_new_questions", return_value={"NEW_QUESTIONS": ["Your reformulated question"]}, @@ -248,7 +251,9 @@ def test_reformulate_empty_query(self, *mocks): }, ) - async def test_reformulate_ok(self, mock_chat_completion, mock__detect_language): + async def test_reformulate_ok( + self, mock_db_session, mock_chat_completion, mock__detect_language + ): with mock.patch( "src.app.services.abst_chat.AbstractChat._detect_past_message_ref", return_value={"REF_TO_PAST": "false", "CONFIDENCE": "0.9"}, diff --git a/src/app/tests/api/api_v1/test_search.py b/src/app/tests/api/api_v1/test_search.py index 41c9231..43c10c5 100644 --- a/src/app/tests/api/api_v1/test_search.py +++ b/src/app/tests/api/api_v1/test_search.py @@ -1,3 +1,4 @@ +import uuid from types import SimpleNamespace from unittest import IsolatedAsyncioTestCase, mock from unittest.mock import patch @@ -7,6 +8,7 @@ from src.app.core.config import settings from src.app.models import collections +from src.app.models.documents import Document, DocumentPayloadModel from src.app.models.search import EnhancedSearchQuery from src.app.services.exceptions import CollectionNotFoundError, ModelNotFoundError from src.app.services.search import SearchService, sort_slices_using_mmr @@ -78,11 +80,74 @@ ), ] +mocked_documents = [ + Document( + score=0.9, + payload=DocumentPayloadModel( + document_corpus="corpus", + document_desc="desc", + document_details={}, + document_id=uuid.UUID("12345678-1234-5678-1234-567812345678"), + document_lang="fr", + document_sdg=[1], + document_title="title", + document_url="url", + slice_content="content", + slice_sdg=1, + ), + ), + Document( + score=0.89, + payload=DocumentPayloadModel( + document_corpus="corpus", + document_desc="desc", + document_details={}, + document_id=uuid.UUID("12345678-1234-5678-1234-567812345678"), + document_lang="fr", + document_sdg=[1], + document_title="title", + document_url="url", + slice_content="content", + slice_sdg=1, + ), + ), + Document( + score=0.88, + payload=DocumentPayloadModel( + document_corpus="corpus", + document_desc="desc", + document_details={}, + document_id=uuid.UUID("12345678-1234-5678-1234-567812345678"), + document_lang="fr", + document_sdg=[1], + document_title="title", + document_url="url", + slice_content="content", + slice_sdg=1, + ), + ), + Document( + score=0.88, + payload=DocumentPayloadModel( + document_corpus="corpus", + document_desc="desc", + document_details={}, + document_id=uuid.UUID("78901234-5678-9012-3456-789012345678"), # UUID + document_lang="fr", + document_sdg=[1], + document_title="title", + document_url="url", + slice_content="content", + slice_sdg=1, + ), + ), +] + long_query = "français with a very long sentence to test what you are saying and if the issue is the size of the string" # noqa: E501 -@patch("src.app.services.sql_service.session_maker") +@patch("src.app.services.sql_db.sql_service.session_maker") @mock.patch( "src.app.services.security.check_api_key_sync", new=mock.MagicMock(return_value=True), @@ -127,7 +192,7 @@ async def test_search_model_not_found(self, *mocks): @patch( f"{search_pipeline_path}.search_handler", - new=mock.AsyncMock(return_value=mocked_scored_points), + new=mock.AsyncMock(return_value=mocked_documents), ) async def test_search_items_success(self, *mocks): with TestClient(app) as client: @@ -171,7 +236,7 @@ async def test_search_all_slices_no_collections(self, *mocks): self.assertEqual(response.status_code, 404) -@patch("src.app.services.sql_service.session_maker") +@patch("src.app.services.sql_db.sql_service.session_maker") @patch( "src.app.services.security.check_api_key_sync", new=mock.MagicMock(return_value=True), @@ -196,7 +261,7 @@ async def test_search_all_slices_no_collections(self, *mocks): ) self.assertEqual(response.status_code, 404) - @patch(f"{search_pipeline_path}.search_handler", return_value=mocked_scored_points) + @patch(f"{search_pipeline_path}.search_handler", return_value=mocked_documents) async def test_search_all_slices_ok(self, *mocks): with TestClient(app) as client: response = client.post( @@ -239,7 +304,7 @@ async def test_search_all_slices_no_result(self, *mocks): self.assertEqual(response.status_code, 204) -@patch("src.app.services.sql_service.session_maker") +@patch("src.app.services.sql_db.sql_service.session_maker") @patch( "src.app.services.security.check_api_key_sync", new=mock.MagicMock(return_value=True), @@ -464,7 +529,7 @@ async def test_search_multi_single_query(self, *mocks): with mock.patch( "src.app.api.api_v1.endpoints.search.search_multi_inputs", ) as search_multi, mock.patch.object( - SearchService, "search_handler", return_value=mocked_scored_points + SearchService, "search_handler", return_value=mocked_documents ) as search_handler: with TestClient(app) as _client: _client.post( diff --git a/src/app/tests/api/api_v1/test_tutor.py b/src/app/tests/api/api_v1/test_tutor.py index 29e1ed3..a304740 100644 --- a/src/app/tests/api/api_v1/test_tutor.py +++ b/src/app/tests/api/api_v1/test_tutor.py @@ -6,38 +6,40 @@ from src.app.core.config import settings from src.main import app -client = TestClient(app) +# client = TestClient(app) -@mock.patch("src.app.services.sql_service.session_maker") +@mock.patch("src.app.services.sql_db.sql_service.session_maker") @mock.patch( "src.app.services.security.check_api_key_sync", new=mock.MagicMock(return_value=True), ) class TutorTests(IsolatedAsyncioTestCase): def test_tutor_no_files(self, *mocks): - - reponse = client.post( - f"{settings.API_V1_STR}/tutor/search", - files={}, - headers={"x-API-Key": "test"}, - ) - assert reponse.status_code == 422 + with TestClient(app) as client: + response = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={}, + headers={"x-API-Key": "test"}, + ) + assert response.status_code == 422 def test_tutor_empty_file(self, *mocks): file = io.BytesIO(b"") - reponse = client.post( - f"{settings.API_V1_STR}/tutor/search", - files={"files": ("test.txt", file)}, - headers={"x-API-Key": "test"}, - ) - self.assertEqual(reponse.status_code, 400) + with TestClient(app) as client: + response = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={"files": ("test.txt", file)}, + headers={"x-API-Key": "test"}, + ) + self.assertEqual(response.status_code, 400) def test_tutor_file(self, *mocks): file = io.BytesIO(b"this is a test file") - reponse = client.post( - f"{settings.API_V1_STR}/tutor/search", - files={"files": ("test.txt", file)}, - headers={"x-API-Key": "test"}, - ) - self.assertEqual(reponse.status_code, 204) + with TestClient(app) as client: + response = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={"files": ("test.txt", file)}, + headers={"x-API-Key": "test"}, + ) + self.assertEqual(response.status_code, 204) diff --git a/src/app/tests/services/test_data_collection.py b/src/app/tests/services/test_data_collection.py new file mode 100644 index 0000000..93f0358 --- /dev/null +++ b/src/app/tests/services/test_data_collection.py @@ -0,0 +1,154 @@ +import unittest +import uuid +from unittest.mock import MagicMock, patch + +from fastapi import HTTPException + +from src.app.services.data_collection import DataCollection, _cache + + +async def fake_run_in_threadpool(func, *args, **kwargs): + return func(*args, **kwargs) + + +class TestDataCollectionCampaignState(unittest.TestCase): + def setUp(self): + _cache["is_campaign_active"] = None + _cache["expires"] = None + + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + def test_campaign_active(self, mock_get_campaign): + mock_campaign = MagicMock() + mock_campaign.is_active = True + mock_get_campaign.return_value = mock_campaign + + dc = DataCollection(host="workshop.example.com") + + self.assertTrue(dc.should_collect) + + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + def test_campaign_inactive(self, mock_get_campaign): + mock_campaign = MagicMock() + mock_campaign.is_active = False + mock_get_campaign.return_value = mock_campaign + + dc = DataCollection(host="workshop.example.com") + + self.assertFalse(dc.should_collect) + + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + def test_non_workshop_host(self, mock_get_campaign): + mock_campaign = MagicMock() + mock_campaign.is_active = True + mock_get_campaign.return_value = mock_campaign + + dc = DataCollection(host="example.com") + + self.assertFalse(dc.should_collect) + + +class TestRegisterChatData(unittest.IsolatedAsyncioTestCase): + + def setUp(self): + _cache["is_campaign_active"] = True + _cache["expires"] = None + + @patch( + "src.app.services.data_collection.run_in_threadpool", + side_effect=fake_run_in_threadpool, + ) + @patch("src.app.services.data_collection.write_chat_answer") + @patch("src.app.services.data_collection.write_user_query") + @patch("src.app.services.data_collection.get_user_from_session_id") + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + async def test_register_chat_data_success( + self, mock_campaign, mock_get_user, mock_write_query, mock_write_answer, _ + ): + mock_campaign.return_value = MagicMock(is_active=True) + + user_id = uuid.uuid4() + conversation_id = uuid.uuid4() + message_id = uuid.uuid4() + + mock_get_user.return_value = user_id + mock_write_query.return_value = conversation_id + mock_write_answer.return_value = message_id + + dc = DataCollection(host="workshop.example.com") + + result = await dc.register_chat_data( + session_id=str(uuid.uuid4()), + user_query="hello", + conversation_id=None, + answer_content="hi", + sources=[], + ) + + self.assertEqual(result, (conversation_id, message_id)) + + @patch( + "src.app.services.data_collection.run_in_threadpool", + side_effect=fake_run_in_threadpool, + ) + @patch("src.app.services.data_collection.write_chat_answer") + @patch("src.app.services.data_collection.write_user_query") + @patch("src.app.services.data_collection.get_user_from_session_id") + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + async def test_register_chat_data_no_session(self, *args): + dc = DataCollection(host="workshop.example.com") + + with self.assertRaises(HTTPException) as ctx: + await dc.register_chat_data( + session_id=None, + user_query="hello", + conversation_id=None, + answer_content="hi", + sources=[], + ) + + self.assertEqual(ctx.exception.status_code, 401) + + @patch( + "src.app.services.data_collection.run_in_threadpool", + side_effect=fake_run_in_threadpool, + ) + @patch( + "src.app.services.data_collection.get_user_from_session_id", return_value=None + ) + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + async def test_register_chat_data_user_not_found(self, mock_campaign, _, __): + mock_campaign.return_value = MagicMock(is_active=True) + + dc = DataCollection(host="workshop.example.com") + + with self.assertRaises(HTTPException) as ctx: + await dc.register_chat_data( + session_id=str(uuid.uuid4()), + user_query="hello", + conversation_id=None, + answer_content="hi", + sources=[], + ) + + self.assertEqual(ctx.exception.status_code, 401) + + +class TestRegisterDocumentClick(unittest.IsolatedAsyncioTestCase): + + @patch( + "src.app.services.data_collection.run_in_threadpool", + side_effect=fake_run_in_threadpool, + ) + @patch("src.app.services.data_collection.update_returned_document_click") + @patch("src.app.services.data_collection.get_current_data_collection_campaign") + async def test_register_document_click(self, mock_campaign, mock_update, _): + mock_campaign.return_value = MagicMock(is_active=True) + + dc = DataCollection(host="workshop.example.com") + + doc_id = uuid.uuid4() + message_id = uuid.uuid4() + + await dc.register_document_click(doc_id, message_id) + + mock_update.assert_called_once_with(doc_id, message_id) diff --git a/src/app/tests/services/test_data_quality.py b/src/app/tests/services/test_data_quality.py index 5b2c9d1..e8d74cc 100644 --- a/src/app/tests/services/test_data_quality.py +++ b/src/app/tests/services/test_data_quality.py @@ -165,7 +165,7 @@ def test_all_duplicates(self): self.assertEqual(len(result), 1) self.assertListEqual(result, [points[0]]) - @patch("src.app.services.sql_service.wl_sql.session_maker") + @patch("src.app.services.sql_db.queries.session_maker") def test__log_duplicates_points_in_db(self, mocked_session_maker): s_maker = sessionmaker(self.engine) mocked_session_maker.return_value = s_maker() diff --git a/src/app/utils/decorators.py b/src/app/utils/decorators.py index d5d8ed6..e9d482e 100644 --- a/src/app/utils/decorators.py +++ b/src/app/utils/decorators.py @@ -1,9 +1,9 @@ import functools import time -from src.app.utils.logger import logger +from src.app.utils.logger import logger as utils_logger -logger = logger(__name__) +logger = utils_logger(__name__) def log_time_and_error(func):