diff --git a/docs/setupcfg.md b/docs/setupcfg.md
new file mode 100644
index 00000000..4d1b26bd
--- /dev/null
+++ b/docs/setupcfg.md
@@ -0,0 +1,52 @@
+The following metadata fields can be extracted from a setup.cfg file.
+These fields are defined in the [setuptools declarative configuration specification](https://setuptools.pypa.io/en/latest/userguide/declarative_config.html), and are mapped according to the [CodeMeta crosswalk for Python Distutils](https://github.com/codemeta/codemeta/blob/master/crosswalks/Python%20Distutils%20(PyPI).csv).
+
+| Software metadata category | SOMEF metadata JSON path | SETUP.CFG metadata file field |
+|--------------------------------|-----------------------------|----------------------------------------|
+| author - value | author[i].result.value | metadata.author |
+| author - email | author[i].result.email | metadata.author_email |
+| author - name | author[i].result.name | metadata.author |
+| code_repository | code_repository[i].result.value | project_urls (source, repository, code) |
+| description | description[i].result.value | metadata.description |
+| documentation | documentation[i].result.value | project_urls (Documentation, docs) |
+| license - value | license[i].result.value | metadata.license or metadata.license_files |
+| license - name | license[i].result.name | metadata.license *(1)* |
+| license - spdx id | license[i].result.spdx_id | metadata.license if "spdx.org/licenses/" *(1)* |
+| has_package_file | has_package_file[i].result.value | URL of the setup.cfg file |
+| homepage | homepage[i].result.value | metadata.url or project_urls (Homepage) |
+| keywords | keywords[i].result.value | metadata.keywords |
+| package_id | package_id[i].result.value | metadata.name |
+| requirements - value | requirements[i].result.value | options.install_requires or options.setup_requires *(2)* |
+| requirements - name | requirements[i].result.name | options.install_requires or options.setup_requires -> name *(2)* |
+| requirements - version | requirements[i].result.version | options.install_requires or options.setup_requires -> version *(2)* |
+| runtime_platform - value | runtime_platform[i].result.value | options.python_requires -> "Python" + version *(3)* |
+| runtime_platform - name | runtime_platform[i].result.name | options.python_requires -> "Python" *(3)* |
+| runtime_platform - version | runtime_platform[i].result.version | options.python_requires *(3)* |
+| version - value | version[i].result.value | metadata.version |
+| version - tag | version[i].result.tag | metadata.version |
+
+---
+
+*(1)*
+- Look for the name and spdx_id in a local dictionary with all licenses.
+
+*(2)*
+- Examples of requirements
+```
+[options]
+install_requires =
+ astropy
+ ctapipe >= 0.12
+ h5py ~= 3.1.0
+
+setup_requires =
+ setuptools >= 40.6.0
+ wheel
+
+```
+
+*(3)*
+- Example:
+```
+python_requires = >= 3.10.0
+```
diff --git a/docs/supported_languages.md b/docs/supported_languages.md
index 9804da9e..96e8110d 100644
--- a/docs/supported_languages.md
+++ b/docs/supported_languages.md
@@ -12,7 +12,7 @@ To know more about the extraction details for each type of file, click on it.
| JavaScript | [`package.json`](./packagejson.md), [`bower.json`](./bower.md) |
| Julia | [`Project.toml`](./julia.md) |
| PHP | [`composer.json`](./composer.md) |
-| Python | [`setup.py`](./setuppy.md), [`pyproject.toml`](./pyprojecttoml.md), [`requirements.txt`](./requirementstxt.md) |
+| Python | [`setup.py`](./setuppy.md), [`setup.cfg`](./setupcfg.md), [`pyproject.toml`](./pyprojecttoml.md), [`requirements.txt`](./requirementstxt.md) |
| R | [`DESCRIPTION`](./description.md) |
| Ruby | [`*.gemspec`](./gemspec.md) |
| Rust | [`Cargo.toml`](./cargo.md) |
diff --git a/docs/supported_metadata_files.md b/docs/supported_metadata_files.md
index 7bdb9566..403e5803 100644
--- a/docs/supported_metadata_files.md
+++ b/docs/supported_metadata_files.md
@@ -21,6 +21,7 @@ SOMEF can extract metadata from a wide range of files commonly found in software
| `pyproject.toml` | Python | Modern Python project configuration file used by tools like Poetry and Flit | [๐](./pyprojecttoml.md)| [๐](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/)| [PEP 621](https://peps.python.org/pep-0621/)| [Example](https://github.com/KnowledgeCaptureAndDiscovery/somef/blob/master/pyproject.toml) |
| `requirements.txt` | Python | Lists Python package dependencies | [๐](./requirementstxt.md)| [๐](https://pip.pypa.io/en/stable/reference/requirements-file-format/)| [Latest](https://pip.pypa.io/en/stable/reference/requirements-file-format/)| [Example](https://github.com/oeg-upm/FAIR-Research-Object/blob/main/requirements.txt) |
| `setup.py` | Python | Package file format used in python projects | [๐](./setuppy.md)| [๐](https://setuptools.pypa.io/en/latest/references/keywords.html)| [v75.0.0](https://github.com/pypa/setuptools)| [Example](https://github.com/oeg-upm/soca/blob/main/setup.py) |
+| `setup.cfg` | Python | Configuration file for setuptools used to define package metadata and options in a declarative way | [๐](./setupcfg.md)| [๐](https://setuptools.pypa.io/en/latest/userguide/declarative_config.html) | [v75.0.0](https://github.com/pypa/setuptools)|[Example](https://github.com/oeg-upm/soca/blob/main/setup.cfg)|
| `DESCRIPTION` | R | Metadata file for R packages including title, author, and version | [๐](./description.md) | [๐](https://cran.r-project.org/doc/manuals/R-exts.html#The-DESCRIPTION-file)| [v4.4.1](https://cran.r-project.org/doc/manuals/r-release/R-exts.html) | [Example](https://github.com/cran/ggplot2/blob/master/DESCRIPTION) |
| `*.gemspec` | Ruby | Manifest file serves as the package descriptor used in Ruby gem projects. | [๐](./gemspec.md)| [๐](https://guides.rubygems.org/specification-reference/)| [v3.5.22](https://github.com/rubygems/rubygems)|[Example](https://github.com/rubygems/rubygems/blob/master/bundler/bundler.gemspec) |
| `cargo.toml` | Rust | Manifest file serves as the package descriptor used in Rust projects | [๐](./cargo.md) | [๐](https://doc.rust-lang.org/cargo/reference/manifest.html)| [v0.85.0](https://github.com/rust-lang/cargo) | [Example](https://github.com/rust-lang/cargo/blob/master/Cargo.toml) |
diff --git a/src/somef/parser/setupcfg_parser.py b/src/somef/parser/setupcfg_parser.py
new file mode 100644
index 00000000..a045ab17
--- /dev/null
+++ b/src/somef/parser/setupcfg_parser.py
@@ -0,0 +1,192 @@
+import re
+import os
+import logging
+import configparser
+from pathlib import Path
+from ..process_results import Result
+from ..utils import constants
+from ..regular_expressions import detect_license_spdx, detect_spdx_from_declared
+
+def parse_setup_cfg(file_path, metadata_result: Result, source):
+ """
+ Parser for setup.cfg files. Very similar to the one for pyproject.toml, but using configparser instead of toml library.
+ """
+
+ try:
+ metadata_result.add_result(
+ constants.CAT_HAS_PACKAGE_FILE,
+ {"value": source, "type": constants.URL},
+ 1,
+ constants.TECHNIQUE_CODE_CONFIG_PARSER,
+ source
+ )
+
+ config = configparser.ConfigParser()
+ config.read(file_path, encoding="utf-8")
+
+ metadata = dict(config["metadata"]) if "metadata" in config else {}
+ options = dict(config["options"]) if "options" in config else {}
+
+ if "name" in metadata:
+ metadata_result.add_result(
+ constants.CAT_PACKAGE_ID,
+ {"value": metadata["name"], "type": constants.STRING},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "version" in metadata:
+ version_value = metadata["version"]
+ if not version_value.startswith("attr:"):
+ metadata_result.add_result(
+ constants.CAT_VERSION,
+ {"value": version_value, "type": constants.RELEASE, "tag": version_value},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "description" in metadata:
+ metadata_result.add_result(
+ constants.CAT_DESCRIPTION,
+ {"value": metadata["description"], "type": constants.STRING},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "author" in metadata or "author_email" in metadata:
+ author_data = {
+ "name": metadata.get("author"),
+ "email": metadata.get("author_email"),
+ "type": constants.AGENT,
+ "value": metadata.get("author")
+ }
+ metadata_result.add_result(
+ constants.CAT_AUTHORS, author_data,
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "license" in metadata:
+ license_value = metadata["license"]
+ license_info_spdx = detect_spdx_from_declared(license_value)
+ if not license_info_spdx:
+ license_info_spdx = detect_license_spdx(license_value, 'JSON')
+ if license_info_spdx:
+ license_data = {
+ "value": license_value,
+ "spdx_id": license_info_spdx.get('spdx_id'),
+ "name": license_info_spdx.get('name'),
+ "type": constants.LICENSE
+ }
+ else:
+ license_data = {"value": license_value, "type": constants.LICENSE}
+
+ metadata_result.add_result(
+ constants.CAT_LICENSE, license_data,
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "keywords" in metadata:
+ for kw in re.split(r'[,\n]', metadata["keywords"]):
+ kw = kw.strip()
+ if kw:
+ metadata_result.add_result(
+ constants.CAT_KEYWORDS,
+ {"value": kw, "type": constants.STRING},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "url" in metadata:
+ metadata_result.add_result(
+ constants.CAT_HOMEPAGE,
+ {"value": metadata["url"], "type": constants.URL},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "install_requires" in options:
+ for req in options["install_requires"].strip().splitlines():
+ req = req.strip()
+ if req:
+ name, version = parse_dependency(req)
+ if name:
+ metadata_result.add_result(
+ constants.CAT_REQUIREMENTS,
+ {
+ "value": req,
+ "name": name,
+ "version": version,
+ "type": constants.SOFTWARE_DEPENDENCY,
+ "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME,
+ "dependency_resolver": "python"
+ },
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "setup_requires" in options:
+ for req in options["setup_requires"].strip().splitlines():
+ req = req.strip()
+ if req:
+ name, version = parse_dependency(req)
+ if name:
+ metadata_result.add_result(
+ constants.CAT_REQUIREMENTS,
+ {
+ "value": req,
+ "name": name,
+ "version": version,
+ "type": constants.SOFTWARE_DEPENDENCY,
+ "dependency_type": constants.DEPENDENCY_TYPE_DEVELOPMENT,
+ "dependency_resolver": "python"
+ },
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "python_requires" in options:
+ metadata_result.add_result(
+ constants.CAT_RUNTIME_PLATFORM,
+ {
+ "value": f"Python{options['python_requires']}",
+ "name": "Python",
+ "version": options["python_requires"],
+ "type": constants.STRING
+ },
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ if "project_urls" in metadata:
+ lines = metadata["project_urls"].split('\n')
+ for line in lines:
+ if '=' in line:
+ label, url_val = [part.strip() for part in line.split('=', 1)]
+ label_lower = label.lower()
+
+ if label_lower in ["documentation", "docs", "doc"]:
+ metadata_result.add_result(
+ constants.CAT_DOCUMENTATION,
+ {"value": url_val, "type": constants.URL},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ elif label_lower in ["repository", "source", "code"]:
+ metadata_result.add_result(
+ constants.CAT_CODE_REPOSITORY,
+ {"value": url_val, "type": constants.URL},
+ 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source
+ )
+
+ except Exception as e:
+ logging.error(f"Error parsing setup.cfg file {file_path}: {str(e)}")
+
+ return metadata_result
+
+def parse_dependency(dependency_str):
+ """Parse a dependency string to extract name and version."""
+ if not dependency_str:
+ return None, None
+
+ parts = re.split(r'(>=|<=|==|!=|>|<|~=)', dependency_str, 1)
+ name = parts[0].strip()
+ if len(parts) > 1:
+ version = ''.join(parts[1:])
+ else:
+ version = ""
+
+ version = re.sub(r'[\[\]]', '', version)
+
+ return name, version
\ No newline at end of file
diff --git a/src/somef/parser/toml_parser.py b/src/somef/parser/toml_parser.py
index 64ad254d..b7ebfd46 100644
--- a/src/somef/parser/toml_parser.py
+++ b/src/somef/parser/toml_parser.py
@@ -118,7 +118,8 @@ def extract_common_version_field(data, metadata_result, source, file_type):
For Project.toml: data["version"]
"""
version_value = None
-
+ version_type = None
+
if file_type == "cargo" and "package" in data and "version" in data["package"]:
version_value = data["package"]["version"]
version_type = constants.RELEASE
diff --git a/src/somef/process_files.py b/src/somef/process_files.py
index 709b6392..2b80ad0f 100644
--- a/src/somef/process_files.py
+++ b/src/somef/process_files.py
@@ -25,6 +25,7 @@
from .parser.publiccode_parser import parse_publiccode_file
from .parser.codeowners_parser import parse_codeowners_file
from .parser.conda_environment_parser import parse_conda_environment_file
+from .parser.setupcfg_parser import parse_setup_cfg
from chardet import detect
@@ -277,7 +278,8 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
(filename.lower() == "environment.yml" or filename.lower() == "environment.yaml") or \
(filename.lower() == ".zenodo.json") or \
(filename.lower() == "cargo.toml" and repo_relative_path == ".") or (filename.lower() == "composer.json" and repo_relative_path == ".") or \
- (filename == "Project.toml" or (filename.lower()== "publiccode.yml" or filename.lower()== "publiccode.yaml") and repo_relative_path == "."):
+ (filename == "Project.toml" or (filename.lower()== "publiccode.yml" or filename.lower()== "publiccode.yaml") and repo_relative_path == ".") or \
+ filename.lower() == "setup.cfg":
if filename.lower() in parsed_build_files and repo_relative_path != ".":
logging.info(f"Ignoring secondary {filename} in {dir_path}")
continue
@@ -318,6 +320,8 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
metadata_result = parse_publiccode_file(os.path.join(dir_path, filename), metadata_result, build_file_url)
if filename.lower() == "environment.yml" or filename.lower() == "environment.yaml":
metadata_result = parse_conda_environment_file(os.path.join(dir_path, filename), metadata_result, build_file_url)
+ if filename.lower() == "setup.cfg":
+ metadata_result = parse_setup_cfg(os.path.join(dir_path, filename), metadata_result, build_file_url)
# if filename.lower() == ".zenodo":
# metadata_result = parse_zenodo_file(os.path.join(dir_path, filename), metadata_result, build_file_url)
parsed_build_files.add(filename.lower())
diff --git a/src/somef/test/test_data/repositories/soca/README.md b/src/somef/test/test_data/repositories/soca/README.md
new file mode 100644
index 00000000..d9716b91
--- /dev/null
+++ b/src/somef/test/test_data/repositories/soca/README.md
@@ -0,0 +1,301 @@
+
+# Software Catalog Creator (soca)
+
+[](https://doi.org/10.5281/zenodo.10316197) [](https://badge.fury.io/py/soca) [](https://www.repostatus.org/#active)
+
+
+
+A python package that given an organization/user name, it will create a software catalog for browsing all repositories or just a single repository in a minimalist card.
+
+## Sample result
+
+Click [here](https://oeg-upm.github.io/soca/example/oeg-upm/index.html) to see an interactive example generated by using the `oeg-upm` organization as input for SOCA.
+
+Click [here](https://oeg-upm.github.io/soca/example/KnowledgeCaptureAndDiscoveryANDmintproject/index.html) to see an interactive example generated by using the `KnowledgeCaptureAndDiscovery` and `mintproject` organization as input for SOCA.
+
+Click [here](https://oeg-upm.github.io/soca/example/LinkedEarth/index.html) to see an interactive example generated by using the `LinkedEarth` organization as input for SOCA.
+
+Command used:
+
+```bash
+soca fetch -i oeg-upm --org -o oeg-upm_repos -na
+soca extract -i oeg-upm_repos -o oeg-upm_metadata -i4p
+soca portal -i oeg-upm_metadata -o oeg-upm_portal
+```
+
+This is an example of a single card using the command:
+
+```bash
+soca card -i https://github.com/oeg-upm/soca --png
+```
+
+
+
+## Requirements
+
+* Git
+* Python 3.10
+
+## Install from GitHub
+
+```bash
+git clone https://github.com/oeg-upm/soca
+cd soca
+pip install -e .
+```
+Highly recommended steps:
+
+```bash
+somef configure
+```
+Alternatively you may run the _installer.sh_ file which will also configure SOMEF, just edit it to it for your needs.
+
+And you will be asked to provide the following:
+
+* A GitHub authentication token [**optional, leave blank if not used**], which SOMEF uses to retrieve metadata from GitHub. If you don't include an authentication token, you can still use SOMEF. However, you may be limited to a series of requests per hour. For more information, see [https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line)
+
+* The path to the trained classifiers (pickle files). If you have your own classifiers, you can provide them here. Otherwise, you can leave it blank
+
+### InfluxDB setup
+For SOCA-Dash to work you will need to have a working version of influx 2.+ as well as grafana on your machine. SOCA-Dash needs two datasources and requires tokens to be able to access the influxDB datasources.
+For more information please visit: https://docs.influxdata.com/influxdb/cloud/reference/cli/influx/auth/create/
+
+To generate a token:
+```
+influx auth create -o [organistation name] --all-access
+```
+
+SOCA-Dash requires influxQL datasource connection within grafana.
+To ensure that influx 2.+ allows influxQL queries execute the following:
+```
+influx v1 dbrp create --db [Bucket Name] -rp 0 --bucket-id [Bucket-id]
+```
+You also need to create a v1 authentication:
+```
+influx v1 auth create \
+ --read-bucket [Bucket-id] \
+ --write-bucket [Bucket-id] \
+ --username admin
+ ```
+Once the influx has been setup and token created please ensure that SOCA is using said token. Now is a good time to execute the SOCA configure command. Or edit the ./_installer.sh_ file to your needs and executing the script.
+
+## Install from DockerFile
+
+```bash
+git clone https://github.com/oeg-upm/soca
+cd soca
+```
+SOCA comes with a _installer.sh_ file which will automatically run the SOCA and SOMEF configure commands. Please edit it in accordance to your needs.
+The _installer.sh_ file is necessary for the docker installation process
+
+```
+docker compose up
+```
+Docker compose up starts the grafana and the influxdb within their own container. It also creates its own network: "socaNet"
+You may want to list the containers you have/running:
+```
+docker ps -a
+```
+If you wish to access the influx container to generate a token you will first need to enter the container:
+```
+docker run exec -it [influx container id] /bin/bash
+```
+This starts an bash shell for the container. Remember, the container must be running at the time of executing this command.
+
+Once within the container you will need to generate a influx token. The following command will generate a token, you may change the token flags to your needs. Once this command returns a token please copy this into the _installer.sh_ file "databaseToken"
+For more information please visit: https://docs.influxdata.com/influxdb/cloud/reference/cli/influx/auth/create/
+
+To generate a token:
+```
+influx auth create -o [organistation name] --access-all
+```
+
+SOCA-Dash requires influxQL datasource connection within grafana.
+To ensure that influx 2.+ allows influxQL queries execute the following:
+```
+influx v1 dbrp create --db [Bucket Name] -rp 0 --bucket-id [Bucket-id]
+```
+You also need to create a v1 authentication:
+```
+influx v1 auth create \
+ --read-bucket [Bucket-id] \
+ --write-bucket [Bucket-id] \
+ --username admin
+ ```
+Once the influx has been setup and token copied to _installer.sh_ you may feel free to exit the container.
+
+Now we need to build the SOCA container, please ensure you are within the github directory when executing this command:
+Remember, container_run.sh will create a summary for the oeg-upm group, modify to your needs and desires. More information can be found within USAGE
+
+```bash
+docker build -t [INSERT_NAME] .
+```
+Once the container has been built you may execute the SOCA container by running the following:
+
+```bash
+docker run -it --network [network influx is running on] [container name]
+```
+
+## SOCA-Dash
+Once the grafana, influx and soca have been set up correctly you can create a grafana dashboard by importing SOCA-Dash.json. This will allow you to visualise the Summary being uploaded to the influxDB.
+
+You will require to have created 2 influxDB datasources, one for flux queries and another for influxQL. The following are two examples on how to do so.
+
+
+For the token use the one previously created.
+
+For the influxQL follow the example provided below.
+
+
+
+Here you can see you must create custom headers. Key being "Authorization" and the key being the same token used for the flux datasource.
+
+
+
+For the login please use the login created during the influx v1 auth create. For the rest add your org_name and bucket name. If you have used the SOCA defaults you can just copy the image
+
+## Usage
+
+```text
+Usage: soca [OPTIONS] COMMAND [ARGS]...
+
+ SOCA (Software Catalog Creator)
+
+ Automatically generates a searchable portal for every repository of an
+ organization/s or user/s, which is easy to host.
+
+ Usage:
+
+ =. (Configure) Create configuration file for database etc
+ 1. (fetch) Fetch all repos from the desired organization/s
+ 2. (extract) Extract all metadata for every repo
+ 3. (portal) Generate a searchable portal for all the retrieved data
+ 4. (summary) Create a summary from the portal information
+
+Options:
+ -h, --help Show this message and exit.
+
+Commands:
+ card Create a stand-alone card ready to be embedded in a website
+ configure This creates a ~/.soca/configure.ini file
+ extract Fetch and save metadata from introduced repos
+ portal Build a portal with a minimalist design
+ fetch Retrieve all organization/s or user/s repositories
+ summary Create a summary of good practices from portal card data
+```
+
+In order to use SOCA you will need to follow the next steps:
+
+### 1 - Fetch
+
+First thing to do is gather all repositories pointers that we want to use. We'll use the `fetch` command to ease this task.
+
+```text
+ -i, --input Organization or user name [required]
+ -o, --output Repository list output file [default: repos]
+ --org Extracting from a organization [default: True]
+ --user Extracting from a user [default: False]
+ -na, --not_archived Fetch only repos that are not archived
+ [default: False]
+ -nf, --not_forked Fetch only repos that are not forked [default:
+ False]
+ -nd, --not_disabled Fetch only repos that are not disabled
+ [default: False]
+ -h, --help Show this message and exit.
+```
+
+Is important to determine if the name belongs to a user or a organization by using the --user or --org flag, additionally you can specify an output path with the flag -o.
+
+Example:
+
+`soca fetch -i dakixr --user`
+`soca fetch -i oeg-upm --org -o oeg-upm_repos --not_archived`
+
+This command also accepts a file as input (names separated by a new-line) for ingesting multiple names at a time.
+
+Example:
+`soca fetch -i multiple-users.csv --user -o multiple-users_repos`
+`soca fetch -i multiple-orgs.csv --org -o multiple-orgs_repos --not_archived`
+
+The output of this command is a csv file with all the repos of the selected users/orgs.
+At this moment is a good time to clean this file (remove all repos that you don't want to use).
+Note: you can add manually any other repository.
+
+### 2 - Extract
+
+Then we use the `extract` command to extract all the metadata required from each repository. If you want a more in-depth analysis on Python repositories use the flag -i4p or --inspect4py.
+
+```text
+ -i, --input Pointers to the repositories in csv format
+ [required]
+ -o, --output Dir where repositories metadata will be saved
+ -i4p, --inspect4py Use inspect4py to extract additional metadata from
+ Python repositories
+ -h, --help Show this message and exit.
+```
+
+Example:
+`soca extract -i oeg-upm_repos -o oeg-upm_metadata`
+
+### 3 - Portal
+
+This is the last step in the pipeline. For building the portal we need to use the command `portal`, it will take as input the directory created by the command extract.
+
+```text
+ -i, --input
+ Dir repositories metadata in json format
+ [required]
+ -o, --output Dir where Software Catalog Portal will be
+ saved [default: portal]
+ -t, --title Portal's title [default: Software Catalog]
+ -fi, --favicon Portal's favicon [default: img/soca-
+ logo.ico]
+ -h, --help Show this message and exit.
+```
+
+Example:
+`soca portal -i oeg-upm_metadata -o dir_portal --title '[Portal's title]'`
+
+If everything worked fine now a new dir should have been created with all the assets and code to deploy this portal.
+
+### Summary
+SOCA now allows to produce a summary json of a given cards_data.json created by the previous portal step.
+User must decide whether or not to upload (default = false), or to create JSON file for output summary
+For building the summary we need to use the command `summary`
+```
+ -i, --input
+ Dir repositories metadata in json format
+ [required]
+ -o, --output Dir where Software Catalog Portal will be
+ saved [default: summary]
+ -U, --upload Will upload file to influxdb
+```
+Example
+`soca summary -i cards_data.json -o test '`
+
+### Create a stand-alone card
+
+SOCA also gives the option to create a single card in one of two different formats:
+
+* HTML
+* PNG
+
+```text
+ -i, --input Repository URL [required]
+ -o, --output Output file where the html will be saved [default:
+ card]
+ --html Save card as html [default: True]
+ --png Save card as a png [default: False]
+ -h, --help Show this message and exit.
+```
+
+As input you will need a github repository url and use one of the flags: --html or --png.
+Note: if no flag is used the default is html.
+
+Example:
+`soca card -i https://github.com/oeg-upm/soca --html`
+`soca card -i https://github.com/oeg-upm/soca --png`
+
+### Styling the portal
+
+In case you want to change the default style of the portal, SOCA decouples the .css files from the code-base. So in the resulting portal directory there will be two .css files are available for further tinkering and styling to everyone needs.
diff --git a/src/somef/test/test_data/repositories/soca/codemeta.json b/src/somef/test/test_data/repositories/soca/codemeta.json
new file mode 100644
index 00000000..befbce13
--- /dev/null
+++ b/src/somef/test/test_data/repositories/soca/codemeta.json
@@ -0,0 +1,75 @@
+{
+ "@context": "https://w3id.org/codemeta/3.0",
+ "@type": [
+ "SoftwareSourceCode",
+ "SoftwareApplication"
+ ],
+ "license": {
+ "name": "Apache License 2.0",
+ "url": "https://raw.githubusercontent.com/oeg-upm/soca/main/LICENSE",
+ "identifier": "https://spdx.org/licenses/Apache-2.0"
+ },
+ "codeRepository": "https://github.com/oeg-upm/soca",
+ "issueTracker": "https://github.com/oeg-upm/soca/issues",
+ "dateCreated": "2021-09-01",
+ "dateModified": "2025-10-06",
+ "downloadUrl": "https://github.com/oeg-upm/soca/releases",
+ "name": "soca",
+ "logo": "https://raw.githubusercontent.com/oeg-upm/soca/main/doc/images/soca-logo.png",
+ "keywords": [
+ "software",
+ "software-engineering",
+ "software-metadata"
+ ],
+ "programmingLanguage": [
+ "Python",
+ "HTML",
+ "JavaScript",
+ "CSS"
+ ],
+ "softwareRequirements": [
+ {
+ "name": "setuptools",
+ "@type": "SoftwareApplication",
+ "version": ">=42"
+ },
+ {
+ "name": "wheel",
+ "@type": "SoftwareApplication"
+ }
+ ],
+ "continuousIntegration": "https://raw.githubusercontent.com/oeg-upm/soca/main/.github/workflows/pypi-publish.yml",
+ "releaseNotes": "This release is a minor update to fix issues and the integration with the dasboards. See an example here: (https://software.oeg.fi.upm.es).\r\n\r\nThis release contains the version of the tool that has been sent to the MSR 2024 conference",
+ "softwareVersion": "0.0.3",
+ "datePublished": "2023-03-30",
+ "buildInstructions": [
+ "https://raw.githubusercontent.com/oeg-upm/soca/main/README.md"
+ ],
+ "author": [
+ {
+ "@type": "Person",
+ "@id": "https://github.com/dgarijo",
+ "name": "Daniel Garijo"
+ },
+ {
+ "@type": "Person",
+ "name": "Daniel Rodrรญguez Mariblanca"
+ },
+ {
+ "@type": "Person",
+ "name": "Miguel Arroyo Mรกrquez"
+ },
+ {
+ "@type": "Person",
+ "name": "Esteban Gonzalez"
+ }
+ ],
+ "developmentStatus": "active",
+ "identifier": [
+ "https://doi.org/10.5281/zenodo.10316197"
+ ],
+ "readme": "https://raw.githubusercontent.com/oeg-upm/soca/main/README.md",
+ "description": [
+ "Software Catalog Creator. A repository that given an organization URL, it will create a software catalog for browsing all repositories"
+ ]
+}
\ No newline at end of file
diff --git a/src/somef/test/test_data/repositories/soca/pyproject.toml b/src/somef/test/test_data/repositories/soca/pyproject.toml
new file mode 100644
index 00000000..b5a3c468
--- /dev/null
+++ b/src/somef/test/test_data/repositories/soca/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+ "setuptools>=42",
+ "wheel"
+]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/src/somef/test/test_data/repositories/soca/setup.cfg b/src/somef/test/test_data/repositories/soca/setup.cfg
new file mode 100644
index 00000000..fed9aba0
--- /dev/null
+++ b/src/somef/test/test_data/repositories/soca/setup.cfg
@@ -0,0 +1,45 @@
+[metadata]
+name = soca
+version = attr: soca.__version__
+author = Daniel Rodrรญguez Mariblanca, Miguel Arroyo Mรกrquez, Esteban Gonzalez, Daniel Garijo
+author_email = daniel.garijo@upm.es
+description = A python package that given an organization/user name, it will create a software catalog for browsing all repositories or just a single repository in a minimalist card.
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://github.com/oeg-upm/soca
+classifiers =
+ Programming Language :: Python :: 3
+ Operating System :: OS Independent
+
+[options]
+package_dir =
+ = src
+packages = find:
+include_package_data = True
+python_requires = >= 3.10.0
+install_requires =
+ somef >= 0.9.5
+ requests
+ bs4
+ htmlmin
+ progressbar2
+ mistune
+ Pygments
+ html2image
+ inspect4py
+ click
+ influxdb-client
+ configparser
+ DateTime
+ pathlib
+ nltk
+ configparser
+ influxdb_client
+ validators
+
+[options.packages.find]
+where = src
+
+[options.entry_points]
+console_scripts =
+ soca = soca.__main__:cli
\ No newline at end of file
diff --git a/src/somef/test/test_setupcfg_parser.py b/src/somef/test/test_setupcfg_parser.py
new file mode 100644
index 00000000..142fd2ce
--- /dev/null
+++ b/src/somef/test/test_setupcfg_parser.py
@@ -0,0 +1,53 @@
+import unittest
+import os
+from pathlib import Path
+from somef.process_results import Result
+from somef.parser.setupcfg_parser import parse_setup_cfg
+from ..utils import constants
+
+test_data_path = str(Path(__file__).parent / "test_data") + os.path.sep
+test_data_repositories = str(Path(__file__).parent / "test_data" / "repositories") + os.path.sep
+
+class TestSetupCfgParser(unittest.TestCase):
+
+ def test_issue_988(self):
+ """
+ Checks that setup.cfg files are parsed correctly.
+ """
+ setupcfg_file_path = test_data_repositories + os.path.sep + "soca" + os.path.sep + "setup.cfg"
+ result = Result()
+
+ metadata_result = parse_setup_cfg(setupcfg_file_path, result, setupcfg_file_path)
+ # print(metadata_result.results)
+ package_results = metadata_result.results.get(constants.CAT_HAS_PACKAGE_FILE, [])
+ self.assertTrue(len(package_results) > 0, "No package file info found")
+ self.assertEqual(package_results[0]["result"]["value"], setupcfg_file_path)
+ self.assertEqual(package_results[0]["result"]["type"], constants.URL)
+
+ id_results = metadata_result.results.get(constants.CAT_PACKAGE_ID, [])
+ self.assertTrue(len(id_results) > 0, "No package id found")
+ self.assertEqual(id_results[0]["result"]["value"], "soca")
+ self.assertEqual(id_results[0]["result"]["type"], constants.STRING)
+
+ author_results = metadata_result.results.get(constants.CAT_AUTHORS, [])
+ self.assertTrue(len(author_results) > 0, "No author found")
+ self.assertEqual(author_results[0]["result"]["email"], "daniel.garijo@upm.es")
+ self.assertEqual(author_results[0]["result"]["type"], constants.AGENT)
+
+ homepage_results = metadata_result.results.get(constants.CAT_HOMEPAGE, [])
+ self.assertTrue(len(homepage_results) > 0, "No homepage found")
+ self.assertEqual(homepage_results[0]["result"]["value"], "https://github.com/oeg-upm/soca")
+ self.assertEqual(homepage_results[0]["result"]["type"], constants.URL)
+ requirements_results = metadata_result.results.get(constants.CAT_REQUIREMENTS, [])
+ self.assertTrue(len(requirements_results) > 0, "No dependencies found")
+ found_dep = False
+ for req_result in requirements_results:
+ dependency = req_result["result"]
+ if dependency.get("name") == "somef" and dependency.get("dependency_type") == constants.DEPENDENCY_TYPE_RUNTIME:
+ found_dep = True
+ self.assertTrue(found_dep, "Dependency 'somef' not found")
+
+ runtime_results = metadata_result.results.get(constants.CAT_RUNTIME_PLATFORM, [])
+ self.assertTrue(len(runtime_results) > 0, "No runtime platform found")
+ self.assertEqual(runtime_results[0]["result"]["name"], "Python")
+ self.assertEqual(runtime_results[0]["result"]["version"], ">= 3.10.0")
\ No newline at end of file